Aim:
Objectives:
Specific research questions:
# Import the necessary packages.
# Library to handle pathnames
import glob
# Library to handle date & times
import datetime
import pytz
# The matplotlip library is used to visualize data.
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.style
import matplotlib.dates as mdates
import matplotlib.cm as cm
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
# Numpy is the numerical library. It is used by many of the ones before, but we import it for in case.
import numpy as np
from numpy.polynomial.polynomial import polyfit
# From Numpy we also import the nan 'missing-value' object that we use alot.
from numpy import nan
# The os library is used to list and access files and directories on the hard drive.
import os
# Used to manipulate data.
import pandas as pd
# Used to plot data.
import seaborn as sns
sns.set_theme(style="darkgrid")
# The pathlib library is used to manage access to disk. It helps us to keep things the same between Windows and Linux.
from pathlib import Path
# Used for statistical analysis such as boxplots and histograms.
from scipy import interpolate
from scipy.stats import linregress
# Used to create windroses or pollution plots.
from windrose import WindroseAxes
# For fancy matching of strings, we use regular expressions with re.
import re
# String libary for temporary file import.
from io import StringIO
# Use for scientific statistics
from sklearn.linear_model import LinearRegression
# Create tooltips for interactive plots.
import mpld3
# For connecting to google sheet.
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
# Handle date time conversions between pandas and matplotlib.
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
timezonesa = pytz.timezone("Africa/Johannesburg")
timezoneutc = pytz.timezone("UTC")
# Configure the connection
scope = ['https://spreadsheets.google.com/feeds']
# Give the path to the Service Account Credential json file
# 1. Go to Google Development Console (https://console.developers.google.com/project)
# 2. Create new project
# 3. Go to “APIs & Services > Credentials” and choose “Create credentials > Service account key”.
# 4. Fill out the form
# 5. Click “Create key”
# 6. Select “JSON” and click “Create”
credentials = ServiceAccountCredentials.from_json_keyfile_name('key/crgprojects-b66f872bd816.json',
scope)
# Authorise your Notebook
gc = gspread.authorize(credentials)
# The sprad sheet ID, which can be taken from the link to the sheet
#https://docs.google.com/spreadsheets/d/1Fe1P2QEiCjl7KK6YEckzGLC_VqOuGtaChaDGbn2SOhE/edit?usp=sharing
#spreadsheet_key = '1pwJDzIl1O8gQSHD8SekW6442rrLRgLVTYA90dQb7chg'
spreadsheet_key = '1Fe1P2QEiCjl7KK6YEckzGLC_VqOuGtaChaDGbn2SOhE'
#Opening the worksheet by using Worksheet ID
def readSheet():
workbook = gc.open_by_key(spreadsheet_key)#Selecting which sheet to pulling the data
reports = workbook.worksheet('LCS-QC')#Pulling the data and transform it to the data frame
values = reports.get_all_values()
Reports = pd.DataFrame(values[1:], columns=values[0])
#Reports=Reports[Reports['What is the site']=='Northam AQMS']
return(reports,Reports)
reports, Reports = readSheet()
# This 'widget' is used to give us interactive graph capabilities in the matplotlib figures.
%matplotlib widget
# This function prints the header of a file without importing
def head(filename,N=10):
n=0
with open(Path(filename)) as fp:
for l in fp.readlines():
if n<N:
print(l,end="")
n=n+1
# Print the tail of a file without importing
def tail(filename,N=10):
n=0
with open(Path(filename)) as fp:
lines=[i for i in fp.readlines()]
Nlines=len(lines)
for l in lines[Nlines-N:]:
if n<N:
print(l,end="")
n=n+1
# Format to numeric
def ConvertNumeric(df,var):
df[var]= pd.to_numeric(df[var],errors='coerce')
# Print the number of columns in file
def ncolumnsLogger(filename):
Sep=","
Data=[]
with open(filename) as fp:
for line in fp.readlines():
row=line.split(Sep)
Data.append(len(row))
return(np.median(np.array(Data)))
# List all the files in the specified directory containing the given extention.
def lsR(dir,pattern=".csv",prefix="",postfix=""):
list=[]
for path,dirs,files in os.walk(Path(dir)):
for f in files:
if f.count(pattern) > 0 and f.startswith(prefix) and f.endswith(postfix):
list.append(os.path.join(path,f))
return(list)
# Clears open plots
def clearplots():
plt.clf()
# Set the main DropBox directory.
DropboxDir=Path("/home/roelof/Dropbox (NWU)")
# Set the main data directory pathway. This is where the main data files are stored.
DataDir=Path(DropboxDir / "CRG_Projects/01_Research_Campaigns/2021_Low-CostSensorSouthAfrica/01_FieldCampaigns")
# Set a pathway for where the figures/images will be saved.
FigDir=Path(DropboxDir / "CRG_Projects/01_Research_Campaigns/2021_Low-CostSensorSouthAfrica/03_DataAnalysisOutputs/Figures")
# Set a pathway for where the tables/data files will be saved.
TabDir=Path(DropboxDir / "CRG_Projects/01_Research_Campaigns/2021_Low-CostSensorSouthAfrica/03_DataAnalysisOutputs/Tables")
os.listdir(DropboxDir/'CRG_Projects/01_Research_Campaigns/2021_Low-CostSensorSouthAfrica/03_DataAnalysisOutputs')
['Tables', 'QCQA_DATA', 'Figures']
Action to take place before Level 1 data:
Look at the files
os.listdir(DataDir/'00_Reference_VanderbijilparkSAWS')
['20210628_1334_00_Reference_DK_21062021_02072021.csv', '20210621_0938_00_Reference_DK_13062021_21062021.csv', '20210726_0917_VandebijlparkSAWS_AR_01072021_23072021.csv', '20210615_1154_00_Reference_VW_01012021_15062021.csv', '20210503_1109_00_Reference_GB_01012021_03052021.csv', '20210816_0950_VandebijlparkSAWS_DK_08082021_16082021.csv', '20210913_0850_VandebijlparkSAWS_DK_06092021_13092021.csv', '20210823_0958_VandebijlparkSAWS_DK_16082021_23082021.csv', 'old', '20210510_1047_00_Reference_DK_03052021_10052021.csv', '20210830_0827_VandebijlparkSAWS_DK_23082021_30082021.csv', '20210517_1154_00_Reference_AR_01012021_17052021.csv', 'reference_data.csv', '20210906_0922_VandebijlparkSAWS_DK_30082021_06092021.csv', '20210719_0917_VandebijlparkSAWS_DK_05072021_19072021.csv', '20210701_0940_00_Reference_DK_01072021_05072021.csv', '20210329_1309_00_Reference_GB_01012021_29032021.csv', 'SAAQIS', '20210426_1202_00_Reference_GB_01022021_26042021.csv', '20210920_0948_VandebijlparkSAWS_DK_13092021_20092021.csv', '20210810_0933_VandebijlparkSAWS_DK_01082021_10082021.csv']
Import the raw data
s = '00'
# Files have different number of columns, use appropiate headers
headers33=[ 'Date','Time','00_Wind_Speed (m/s)', '00_Wind_Speed_Status',
'00_Wind_Direction (deg)', '00_Wind_Direction_Status',
'00_Ambient_Temp (degC)', '00_Ambient_Temp_Status',
'00_Relative_Humidity (%)', '00_Relative_Humidity_Status',
'00_Solar_Radiation (W/m2)', '00_Solar_Radiation_Status',
'00_Atmospheric_Pressure (hPa)', '00_Atmospheric_Pressure_Status',
'00_Internal_Temp (degC)', '00_Internal_Temp_Status',
'00_Line_Voltage (V)', '00_Line_Voltage_Status', '00_Rain (mm)',
'00_Rain_Status', '00_SO2 (ppb)', '00_SO2_Status', '00_PM10 (ug/m3)',
'00_PM10_Status', '00_PM2.5 (ug/m3)', '00_PM2.5_Status', '00_NO (ppb)',
'00_NO_Status', '00_NO2 (ppb)', '00_NO2_Status', '00_NOx (ppb)',
'00_NOx_Status','dummy']
headers37=[ 'Date','Time','00_Wind_Speed (m/s)', '00_Wind_Speed_Status',
'00_Wind_Direction (deg)', '00_Wind_Direction_Status',
'00_Ambient_Temp (degC)', '00_Ambient_Temp_Status',
'00_Relative_Humidity (%)', '00_Relative_Humidity_Status',
'00_Solar_Radiation (W/m2)', '00_Solar_Radiation_Status',
'00_Atmospheric_Pressure (hPa)', '00_Atmospheric_Pressure_Status',
'00_Internal_Temp (degC)', '00_Internal_Temp_Status',
'00_Line_Voltage (V)', '00_Line_Voltage_Status', '00_Rain (mm)',
'00_Rain_Status', '00_SO2 (ppb)', '00_SO2_Status', '00_PM10 (ug/m3)',
'00_PM10_Status', '00_PM2.5 (ug/m3)', '00_PM2.5_Status', '00_NO (ppb)',
'00_NO_Status', '00_NO2 (ppb)', '00_NO2_Status', '00_NOx (ppb)',
'00_NOx_Status', '00_O3 (ppb)', '00_O3_Status', '00_CO (ppm)',
'00_CO_Status','dummy']
headers39=[ 'Date','Time','00_Wind_Speed (m/s)', '00_Wind_Speed_Status',
'00_Wind_Direction (deg)', '00_Wind_Direction_Status',
'00_Ambient_Temp (degC)', '00_Ambient_Temp_Status',
'00_Relative_Humidity (%)', '00_Relative_Humidity_Status',
'00_Solar_Radiation (W/m2)', '00_Solar_Radiation_Status',
'00_Atmospheric_Pressure (hPa)', '00_Atmospheric_Pressure_Status',
'00_Internal_Temp (degC)', '00_Internal_Temp_Status',
'00_Line_Voltage (V)', '00_Line_Voltage_Status', '00_Rain (mm)',
'00_Rain_Status', '00_SO2 (ppb)', '00_SO2_Status', '00_PM10 (ug/m3)',
'00_PM10_Status', '00_PM2.5 (ug/m3)', '00_PM2.5_Status', '00_NO (ppb)',
'00_NO_Status', '00_NO2 (ppb)', '00_NO2_Status', '00_NOx (ppb)',
'00_NOx_Status', '00_O3 (ppb)', '00_O3_Status', '00_CO (ppm)',
'00_CO_Status', '00_Horiba_SO2 (ppb)', '00_Horiba_SO2_Status','dummy']
df0 = pd.DataFrame()
files = [f for f in os.listdir(DataDir/'00_Reference_VanderbijilparkSAWS') if f.endswith(".csv") and 'data' in f]
for f in files:
fullpath = os.path.join(DataDir/'00_Reference_VanderbijilparkSAWS',f)
print('reading',f)
dftmp = pd.read_csv(fullpath,skiprows=2,encoding='utf-8', header=None)
if len(dftmp.columns)==33:
dftmp.columns=headers33
elif len(dftmp.columns)==37:
dftmp.columns=headers37
elif len(dftmp.columns)==39:
dftmp.columns=headers39
dftmp['d']=pd.to_datetime(dftmp['Date']+' '+dftmp['Time'], dayfirst=True)
dftmp=dftmp.set_index('d')
dftmp.index.names= ['Date']
dftmp = dftmp.drop(columns=['Date','Time','dummy'])
print('finished with', f)
if len(df0) == 0:
df0 = dftmp
else:
df0 = pd.concat([dftmp,df0],axis=0).drop_duplicates()
reading reference_data.csv finished with reference_data.csv
Format values
df0=df0.sort_index()
Var= [ '00_Wind_Speed (m/s)', '00_Wind_Speed_Status',
'00_Wind_Direction (deg)', '00_Wind_Direction_Status',
'00_Ambient_Temp (degC)', '00_Ambient_Temp_Status',
'00_Relative_Humidity (%)', '00_Relative_Humidity_Status',
'00_Solar_Radiation (W/m2)', '00_Solar_Radiation_Status',
'00_Atmospheric_Pressure (hPa)', '00_Atmospheric_Pressure_Status',
'00_Internal_Temp (degC)', '00_Internal_Temp_Status',
'00_Line_Voltage (V)', '00_Line_Voltage_Status', '00_Rain (mm)',
'00_Rain_Status', '00_SO2 (ppb)', '00_SO2_Status', '00_PM10 (ug/m3)',
'00_PM10_Status', '00_PM2.5 (ug/m3)', '00_PM2.5_Status', '00_NO (ppb)',
'00_NO_Status', '00_NO2 (ppb)', '00_NO2_Status', '00_NOx (ppb)',
'00_NOx_Status', '00_O3 (ppb)', '00_O3_Status', '00_CO (ppm)',
'00_CO_Status', '00_Horiba_SO2 (ppb)', '00_Horiba_SO2_Status']
if len(df0) > 0:
for v in Var:
ConvertNumeric(df0,v)
df0.index=df0.index.tz_localize('Africa/Johannesburg')
ParNames=[i for i in df0.columns if 'Status' not in i]
Correct values according to status
ParNames=[i for i in df0.columns if 'Status' not in i]
for p in ParNames:
svar=p.split()[0]+"_Status"
if svar in df0.columns:
print("Corrected {}".format(p))
df0.loc[df0[svar]!=1,p]=nan
Corrected 00_Wind_Speed (m/s) Corrected 00_Wind_Direction (deg) Corrected 00_Ambient_Temp (degC) Corrected 00_Relative_Humidity (%) Corrected 00_Solar_Radiation (W/m2) Corrected 00_Atmospheric_Pressure (hPa) Corrected 00_Internal_Temp (degC) Corrected 00_Line_Voltage (V) Corrected 00_Rain (mm) Corrected 00_SO2 (ppb) Corrected 00_PM10 (ug/m3) Corrected 00_PM2.5 (ug/m3) Corrected 00_NO (ppb) Corrected 00_NO2 (ppb) Corrected 00_NOx (ppb) Corrected 00_O3 (ppb) Corrected 00_CO (ppm) Corrected 00_Horiba_SO2 (ppb)
df0 = df0[ParNames]
df0 = df0.loc['2021-04-26':'2021-10-29']
%matplotlib inline
Look at the raw data
df0.plot(subplots=True, figsize=(15,15))
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df0.loc['2021-08-02':'2021-08-16','00_NO (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
df0.loc['2021-08-02':'2021-08-16','00_NO (ppb)' ] = df0.loc['2021-08-02':'2021-08-16','00_NO (ppb)' ] + 4
df0[['00_NO (ppb)']].plot()
<AxesSubplot:xlabel='Date'>
df0.loc['2021','00_NO2 (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
Adjust negative baseline
df0.loc['2021-08-03':'2021-08-30','00_NO2 (ppb)' ] = df0.loc['2021-08-03':'2021-08-30','00_NO2 (ppb)' ] + 4
df0.loc['2021','00_NO2 (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
df0.loc['2021','00_NOx (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
Adjust baseline
df0.loc['2021-08-03':'2021-08-15','00_NOx (ppb)' ] = df0.loc['2021-08-03':'2021-08-15','00_NOx (ppb)' ] + 8
df0.loc['2021-07-07':'2021-07-23','00_SO2 (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
Correct instrument drift
df0.loc['2021-07-07':'2021-07-23', '00_SO2 (ppb)'] = df0.loc['2021-07-07':'2021-07-23', '00_SO2 (ppb)'] + 10
df0.loc['2021','00_SO2 (ppb)'].plot()
<AxesSubplot:xlabel='Date'>
df0.loc[:, '00_CO (ppm)'].plot()
<AxesSubplot:xlabel='Date'>
Correct baseline drift
df0['date'] = df0.index
df0['date_ordinal'] = pd.to_datetime(df0['date']).apply(lambda date: date.toordinal())
df_co = pd.DataFrame()
df_co['date_ordinal'] = pd.to_datetime(df0['date'].loc['2021-05-10':'2021-08-01']).apply(lambda date: date.toordinal())
df_co['CO Min'] = df_co.apply(lambda _: '', axis=1)
df_co.loc['2021-05-10 00:00:00+02:00', 'CO Min'] = 0.277392
df_co.loc['2021-08-01 23:59:00+02:00', 'CO Min'] = 1.191833
df_co = df_co.apply(lambda x: x.str.strip() if isinstance(x, str) else x).replace('', np.nan)
df_co.head()
| date_ordinal | CO Min | |
|---|---|---|
| Date | ||
| 2021-05-10 00:00:00+02:00 | 737920 | 0.277392 |
| 2021-05-10 00:01:00+02:00 | 737920 | NaN |
| 2021-05-10 00:02:00+02:00 | 737920 | NaN |
| 2021-05-10 00:03:00+02:00 | 737920 | NaN |
| 2021-05-10 00:04:00+02:00 | 737920 | NaN |
#Interploate minimum values
df_co['CO Min'] = df_co['CO Min'].interpolate('linear')
#Create list
co_times = df_co.index.to_list()
#Correct drift
df0.loc[co_times, '00_CO (ppm)'] = df0.loc[co_times, '00_CO (ppm)'] - df_co['CO Min']
df0['00_CO (ppm)'].plot()
<AxesSubplot:xlabel='Date'>
df0 = df0.resample('1min').mean()
# Export data to rawdata directory
df0.to_csv(Path(TabDir/ 'df0.csv'), float_format="%.3f")
Function to read files
def readRM(filename, nLines=18, nChars=40):
DataString=""
Sep=","
with open(filename,encoding='ISO-8859-1') as fp:
for line in fp.readlines():
if len(line)==nChars:
row=line.split(Sep)
if len(row) == nLines and re.match('\W*(\d{4})[/.-](\d{2})[/.-](\d{2})\s{1}(\d{2}):(\d{2}):(\d{2})',row[0]):
DataString=DataString+line
return(DataString)
sorted(os.listdir(DataDir/'01_MetOneES642_U16486'))
['20210503_1100_01_ES642_U16486_GB.csv', '20210512_1055_01_ES642_U16486_GB.csv', '20210517_2300_01_ES642_U16486_GB.csv', '20210528_1100_01_ES642_U16486_GB.csv', '20210531_0953_01_ES642_U16486_GB.csv', '20210624_1153_01_ES642_U16486_GB.csv', '20210629_0844_01_ES642_U16486_GB.csv', '20210708_0837_01_ES642_U16486_GB.csv', '20210712_0845_01_ES642_U16486_GB.csv', '20210719_0649_01_ES642_U16486_GB.csv', '20210810_0649_01_ES642_U16486_GB.csv', '20210816_0649_01_ES642_U16486_GB.csv', '20210823_0649_01_ES642_U16486_GB(1).csv', '20210823_0649_01_ES642_U16486_GB.csv', '20210921_1100_01_ES642_U16486_GB.csv']
Read files
ESNames=["Date",
"01_PM2.5 (ug/m3)",
"01_Flow (l/m)",
"01_Ambient_Temperature (degC)",
"01_Relative_Humidity (%)",
"01_Pressure (hPa)",
"Status2",
"Checksum"]
ESvariables=["01_PM2.5 (ug/m3)","01_Flow (l/m)","01_Ambient_Temperature (degC)","01_Relative_Humidity (%)","01_Pressure (hPa)"]
df1 = pd.DataFrame()
files=[f for f in os.listdir(DataDir/'01_MetOneES642_U16486')]
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(DataDir/'01_MetOneES642_U16486',f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
Data = readRM(fullpath,nLines=8,nChars=59)
dftmp = pd.read_csv(StringIO(Data),index_col=0,parse_dates=[0],names=ESNames)
dftmp["01_PM2.5 (ug/m3)"]=dftmp["01_PM2.5 (ug/m3)"]*1000
if len(dftmp) == 0:
df1 = dftmp
else:
df1 = df1.combine_first(dftmp)
print("Finished with {}".format(f))
if len(df1) > 0:
# Make sure time ingest was succesful
for v in ESvariables:
ConvertNumeric(df1,v)
df1.index=pd.to_datetime(df1.index, errors='coerce')
# Resample datasets to 5 minute interval
df1=df1[ESvariables].resample('5min').mean()
df1=df1['2021-04-26':]
1 of 15: Reading 20210624_1153_01_ES642_U16486_GB.csv Finished with 20210624_1153_01_ES642_U16486_GB.csv 2 of 15: Reading 20210503_1100_01_ES642_U16486_GB.csv Finished with 20210503_1100_01_ES642_U16486_GB.csv 3 of 15: Reading 20210921_1100_01_ES642_U16486_GB.csv Finished with 20210921_1100_01_ES642_U16486_GB.csv 4 of 15: Reading 20210629_0844_01_ES642_U16486_GB.csv Finished with 20210629_0844_01_ES642_U16486_GB.csv 5 of 15: Reading 20210823_0649_01_ES642_U16486_GB(1).csv Finished with 20210823_0649_01_ES642_U16486_GB(1).csv 6 of 15: Reading 20210719_0649_01_ES642_U16486_GB.csv Finished with 20210719_0649_01_ES642_U16486_GB.csv 7 of 15: Reading 20210512_1055_01_ES642_U16486_GB.csv Finished with 20210512_1055_01_ES642_U16486_GB.csv 8 of 15: Reading 20210816_0649_01_ES642_U16486_GB.csv Finished with 20210816_0649_01_ES642_U16486_GB.csv 9 of 15: Reading 20210528_1100_01_ES642_U16486_GB.csv Finished with 20210528_1100_01_ES642_U16486_GB.csv 10 of 15: Reading 20210708_0837_01_ES642_U16486_GB.csv Finished with 20210708_0837_01_ES642_U16486_GB.csv 11 of 15: Reading 20210517_2300_01_ES642_U16486_GB.csv Finished with 20210517_2300_01_ES642_U16486_GB.csv 12 of 15: Reading 20210531_0953_01_ES642_U16486_GB.csv Finished with 20210531_0953_01_ES642_U16486_GB.csv 13 of 15: Reading 20210823_0649_01_ES642_U16486_GB.csv Finished with 20210823_0649_01_ES642_U16486_GB.csv 14 of 15: Reading 20210712_0845_01_ES642_U16486_GB.csv Finished with 20210712_0845_01_ES642_U16486_GB.csv 15 of 15: Reading 20210810_0649_01_ES642_U16486_GB.csv Finished with 20210810_0649_01_ES642_U16486_GB.csv
df1.index=df1.index.tz_localize('UTC')
df1.index=df1.index.tz_convert('Africa/Johannesburg')
df1.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 42624 entries, 2021-04-26 02:00:00+02:00 to 2021-09-21 01:55:00+02:00 Freq: 5T Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 01_PM2.5 (ug/m3) 42610 non-null float64 1 01_Flow (l/m) 42610 non-null float64 2 01_Ambient_Temperature (degC) 42610 non-null float64 3 01_Relative_Humidity (%) 42610 non-null float64 4 01_Pressure (hPa) 42610 non-null float64 dtypes: float64(5) memory usage: 2.0 MB
df1.loc[(df1["01_Flow (l/m)"]<1.8)|(df1["01_Flow (l/m)"]>2.2) ,'01_PM2.5 (ug/m3)']=nan
df1.loc['2021-06-15']=nan
Quick Look
df1.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
df1.to_csv(Path(TabDir/'df1.csv',index = True, header=True, float_format="%.03f"))
# ES642 02 data directory
df2Dir = Path(DataDir/'02_MetoneES642_U16489')
Import data
ESNames=["Date",
"02_PM2.5 (ug/m3)",
"02_Flow (l/m)",
"02_Ambient_Temperature (degC)",
"02_Relative_Humidity (%)",
"02_Pressure (hPa)",
"Status2",
"Checksum"]
ESvariables=["02_PM2.5 (ug/m3)","02_Flow (l/m)","02_Ambient_Temperature (degC)",
"02_Relative_Humidity (%)","02_Pressure (hPa)"]
df2 = pd.DataFrame()
files=[f for f in os.listdir(DataDir/'02_MetoneES642_U16489')]
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(DataDir/'02_MetoneES642_U16489',f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
Data = readRM(fullpath,nLines=8,nChars=59)
dftmp = pd.read_csv(StringIO(Data),index_col=0,parse_dates=[0],names=ESNames)
dftmp["02_PM2.5 (ug/m3)"]=dftmp["02_PM2.5 (ug/m3)"]*1000
if len(dftmp) == 0:
df2 = dftmp
else:
df2 = df2.combine_first(dftmp)
print("Finished with {}".format(f))
if len(df2) > 0:
# Make sure time ingest was succesful
for v in ESvariables:
ConvertNumeric(df2,v)
df2.index=pd.to_datetime(df2.index, errors='coerce')
# Resample datasets to 5 minute interval
df2=df2[ESvariables].resample('5min').mean()
df2=df2['2021-04-26':]
1 of 15: Reading 20210828_0850_02_ES642_U16489_GB.csv Finished with 20210828_0850_02_ES642_U16489_GB.csv 2 of 15: Reading 20210712_0906_02_ES642_U16489_GB.csv Finished with 20210712_0906_02_ES642_U16489_GB.csv 3 of 15: Reading 20210517_2300_02_ES642_U16489_GB.csv Finished with 20210517_2300_02_ES642_U16489_GB.csv 4 of 15: Reading 20210524_1352_02_ES642_U16489_GB.csv Finished with 20210524_1352_02_ES642_U16489_GB.csv 5 of 15: Reading 20210708_0842_02_ES642_U16489_GB.csv Finished with 20210708_0842_02_ES642_U16489_GB.csv 6 of 15: Reading 20210512_1047_02_ES642_U16489_GB.csv Finished with 20210512_1047_02_ES642_U16489_GB.csv 7 of 15: Finished with 20210719_0850_02_ES642_U16489_GB 8 of 15: Reading 20210503_1100_02_ES642_U16489_GB.csv Finished with 20210503_1100_02_ES642_U16489_GB.csv 9 of 15: Reading 20210624_1202_02_ES642_U16489_GB.csv Finished with 20210624_1202_02_ES642_U16489_GB.csv 10 of 15: Reading 20210528_0958_02_ES642_U16489_GB.csv Finished with 20210528_0958_02_ES642_U16489_GB.csv 11 of 15: Reading 20210810_0850_02_ES642_U16489_GB.csv Finished with 20210810_0850_02_ES642_U16489_GB.csv 12 of 15: Reading 20210921_1100_02_ES642_U16489_GB.csv Finished with 20210921_1100_02_ES642_U16489_GB.csv 13 of 15: Reading 20210816_0850_02_ES642_U16489_GB.csv Finished with 20210816_0850_02_ES642_U16489_GB.csv 14 of 15: Reading 20211019_1100_02_ES642_U16489_GB.csv Finished with 20211019_1100_02_ES642_U16489_GB.csv 15 of 15: Reading 20210823_0850_02_ES642_U16489_GB.csv Finished with 20210823_0850_02_ES642_U16489_GB.csv
df2.index = pd.to_datetime(df2.index, utc=True)
#df2.index=df2.index.tz_localize('UTC')
df2.index=df2.index.tz_convert('Africa/Johannesburg')
df2.loc[(df2["02_Flow (l/m)"]<1.8)|(df2["02_Flow (l/m)"]>2.2) ,'02_PM2.5 (ug/m3)']=nan
df2.loc['2021-04-19']=nan
df2.loc['2021-07-07']=nan
df2.loc['2021-09-21']=nan
Quick Look
df2.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
df2.to_csv(Path(TabDir/'df2.csv',index = True, header=True), float_format="%.03f")
Timezone : local
Ari03 = Path(DataDir/'03_QuantAQARISense_SN000-57')
Import Data
AriVariables = ['id',
'timestamp',
'timestamp_local',
'sn',
'raw_table_id',
'temp_box',
'03_Ambient_Temperature (degC)',
'03_Relative_Humidity (%)',
'03_Pressure (hPa)',
'03_Noise (dB)',
'03_Solar_Radiation (W/m2)',
'03_Wind_Direction (deg)',
'03_Wind_Speed (m/s)',
'03_CO2 (ppm)',
'03_VOC (ppb)',
'03_CO (ppm)',
'03_NO (ppb)',
'03_NO2 (ppb)',
'03_O3 (ppb)',
'03_PM1 (ug/m3)',
'03_PM2.5 (ug/m3)',
'03_PM10 (ug/m3)',
'03_lat',
'03_lon',
'03_device_state',
'03_co2_model_id',
'03_voc_model_id',
'03_co_model_id',
'03_no_model_id',
'03_no2_model_id',
'03_o3_model_id',
'03_pm1_model_id',
'03_pm25_model_id',
'03_pm10_model_id']
files = [f for f in os.listdir(Ari03)]
df03 = pd.DataFrame()
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(Ari03,f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
dftmp = pd.read_csv(fullpath,header=0,names=AriVariables,na_values=["NAN"],index_col=2,parse_dates=[2])
if len(dftmp) == 0:
df03 = dftmp
else:
df03 = df03.combine_first(dftmp)
print("Finished with {}".format(f))
if len(df03) > 0:
# Make sure time ingest was succesful
for v in AriVariables:
ConvertNumeric(df03,v)
df03.index=pd.to_datetime(df03.index, errors='coerce')
# Resample datasets to 5 minute interval
df03=df03[AriVariables].resample('5min').mean()
1 of 21: Reading 20211029_0847_03_Arisense_SN000-57_DK.csv Finished with 20211029_0847_03_Arisense_SN000-57_DK.csv 2 of 21: Reading 20210705_1115_03_Arisense_SN000-57_VW.csv Finished with 20210705_1115_03_Arisense_SN000-57_VW.csv 3 of 21: Reading 20210803_1030_03_Arisense_SN000-57_VW.csv Finished with 20210803_1030_03_Arisense_SN000-57_VW.csv 4 of 21: Reading 20210908_1000_03_Arisense_SN000-57_VW.csv Finished with 20210908_1000_03_Arisense_SN000-57_VW.csv 5 of 21: Reading 20210830_1600_03_Arisense_SN000-57_VW.csv Finished with 20210830_1600_03_Arisense_SN000-57_VW.csv 6 of 21: Reading 20210823_1012_03_Arisense_SN000-57_VW.csv Finished with 20210823_1012_03_Arisense_SN000-57_VW.csv 7 of 21: Reading 20210409_1023_03_ARISense_SN000-57_VW.csv Finished with 20210409_1023_03_ARISense_SN000-57_VW.csv 8 of 21: Reading 20211025_1407_03_Arisense_SN000-57_DK.csv Finished with 20211025_1407_03_Arisense_SN000-57_DK.csv 9 of 21: Reading 20210524_1120_03_Arisense_SN000-57_VW.csv Finished with 20210524_1120_03_Arisense_SN000-57_VW.csv 10 of 21: Reading 20210728_1013_03_Arisense_SN000-57_VW.csv Finished with 20210728_1013_03_Arisense_SN000-57_VW.csv 11 of 21: Reading 20211005_1243_03_Arisense_SN000-57_DK.csv Finished with 20211005_1243_03_Arisense_SN000-57_DK.csv 12 of 21: Reading 20210629_0900_03_Arisense_SN000-57_VW.csv Finished with 20210629_0900_03_Arisense_SN000-57_VW.csv 13 of 21: Reading 20210712_1430_03_Arisense_SN000-57_VW.csv Finished with 20210712_1430_03_Arisense_SN000-57_VW.csv 14 of 21: Reading 20210518_0930_03_Arisense_SN000-57_VW.csv Finished with 20210518_0930_03_Arisense_SN000-57_VW.csv 15 of 21: Reading 20210621_1238_03_Arisense_SN000-57_VW.csv Finished with 20210621_1238_03_Arisense_SN000-57_VW.csv 16 of 21: Reading 20211011_0630_03_Arisense_SN000-57_DK.csv Finished with 20211011_0630_03_Arisense_SN000-57_DK.csv 17 of 21: Reading 20210920_1208_03_Arisense_SN000-57_DK.csv Finished with 20210920_1208_03_Arisense_SN000-57_DK.csv 18 of 21: Reading 20210511_0930_03_ARISense_SN000-57_VW.csv Finished with 20210511_0930_03_ARISense_SN000-57_VW.csv 19 of 21: Reading 20210816_1030_03_Arisense_SN000-57_VW.csv Finished with 20210816_1030_03_Arisense_SN000-57_VW.csv 20 of 21: Reading 20210503_2145_03_ARISense_SN000-57_VW.csv Finished with 20210503_2145_03_ARISense_SN000-57_VW.csv 21 of 21: Finished with 20211029_0847_03_Arisense_SN000-57_DK.xlsx
df03.index = df03.index.tz_convert('Africa/Johannesburg')
Define columns
ParNames=['03_Ambient_Temperature (degC)',
'03_Relative_Humidity (%)',
'03_Pressure (hPa)',
'03_Noise (dB)',
'03_Solar_Radiation (W/m2)',
'03_Wind_Direction (deg)',
'03_Wind_Speed (m/s)',
'03_CO (ppm)',
'03_NO (ppb)',
'03_NO2 (ppb)',
'03_O3 (ppb)',
'03_PM1 (ug/m3)',
'03_PM2.5 (ug/m3)',
'03_PM10 (ug/m3)']
df03=df03[ParNames]
df03['03_CO (ppm)']=df03['03_CO (ppm)']/1000
df03['03_Pressure (hPa)']=df03['03_Pressure (hPa)']/100
df03 = df03['2021-04-26':]
df03.loc[(df03['03_CO (ppm)']<0.001)|(df03['03_CO (ppm)']>130000),'03_CO (ppm)']=nan
df03.loc['2021-07-04':'2021-07-05']=nan
df03.loc['2021-10-01':'2021-10-02']=nan
df03.loc['2021-07-07 06:00:00':'2021-07-07 08:00:00']=nan
Quick Look
df03.plot(subplots=True)
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>], dtype=object)
df03.to_csv(Path(TabDir/'df3.csv',index = True, header=True), float_format="%.03f")
Timezone: local
Ari04 = Path(DataDir/'04_QuantAQARISense_SN000-59')
AriVariables = ['id',
'timestamp',
'timestamp_local',
'sn',
'raw_table_id',
'temp_box',
'04_Ambient_Temperature (degC)',
'04_Relative_Humidity (%)',
'04_Pressure (hPa)',
'04_Noise (dB)',
'04_Solar_Radiation (W/m2)',
'04_Wind_Direction (deg)',
'04_Wind_Speed (m/s)',
'04_CO2 (ppm)',
'04_VOC (ppb)',
'04_CO (ppm)',
'04_NO (ppb)',
'04_NO2 (ppb)',
'04_O3 (ppb)',
'04_PM1 (ug/m3)',
'04_PM2.5 (ug/m3)',
'04_PM10 (ug/m3)',
'04_lat',
'04_lon',
'04_device_state',
'04_co2_model_id',
'04_voc_model_id',
'04_co_model_id',
'04_no_model_id',
'04_no2_model_id',
'04_o3_model_id',
'04_pm1_model_id',
'04_pm25_model_id',
'04_pm10_model_id']
files = [f for f in os.listdir(Ari04)]
df04 = pd.DataFrame()
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(Ari04,f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
dftmp = pd.read_csv(fullpath,header=0,names=AriVariables,na_values=["NAN"],index_col=2,parse_dates=[2])
if len(dftmp) == 0:
df04 = dftmp
else:
df04 = df04.combine_first(dftmp)
print("Finished with {}".format(f))
if len(df04) > 0:
# Make sure time ingest was succesful
for v in AriVariables:
ConvertNumeric(df04,v)
df04.index=pd.to_datetime(df04.index, errors='coerce')
# Resample datasets to 5 minute interval
df04=df04[AriVariables].resample('5min').mean()
1 of 11: Reading 20210803_1030_04_Arisense_SN000-59_VW.csv Finished with 20210803_1030_04_Arisense_SN000-59_VW.csv 2 of 11: Reading 20210409_1023_04_ARISense_SN000-59_VW.csv Finished with 20210409_1023_04_ARISense_SN000-59_VW.csv 3 of 11: Reading 20210518_0930_04_Arisense_SN000-59_VW.csv Finished with 20210518_0930_04_Arisense_SN000-59_VW.csv 4 of 11: Reading 20210712_1430_04_Arisense_SN000-59_VW.csv Finished with 20210712_1430_04_Arisense_SN000-59_VW.csv 5 of 11: Reading 20210524_1118_04_Arisense_SN000-59_VW.csv Finished with 20210524_1118_04_Arisense_SN000-59_VW.csv 6 of 11: Reading 20210511_0930_04_ARISense_SN000-59_VW.csv Finished with 20210511_0930_04_ARISense_SN000-59_VW.csv 7 of 11: Reading 20210621_1238_04_Arisense_SN000-59_VW.csv Finished with 20210621_1238_04_Arisense_SN000-59_VW.csv 8 of 11: Reading 20210503_2145_04_ARISense_SN000-59_VW.csv Finished with 20210503_2145_04_ARISense_SN000-59_VW.csv 9 of 11: Reading 20210629_0900_04_Arisense_SN000-59_VW.csv Finished with 20210629_0900_04_Arisense_SN000-59_VW.csv 10 of 11: Reading 20210728_1018_04_Arisense_SN000-59_VW.csv Finished with 20210728_1018_04_Arisense_SN000-59_VW.csv 11 of 11: Reading 20210705_1115_04_Arisense_SN000-59_VW.csv Finished with 20210705_1115_04_Arisense_SN000-59_VW.csv
df04.index = df04.index.tz_convert('Africa/Johannesburg')
Define Columns
ParNames=['04_Ambient_Temperature (degC)',
'04_Relative_Humidity (%)',
'04_Pressure (hPa)',
'04_Noise (dB)',
'04_Solar_Radiation (W/m2)',
'04_Wind_Direction (deg)',
'04_Wind_Speed (m/s)',
'04_CO (ppm)',
'04_NO (ppb)',
'04_NO2 (ppb)',
'04_O3 (ppb)',
'04_PM1 (ug/m3)',
'04_PM2.5 (ug/m3)',
'04_PM10 (ug/m3)']
df04=df04[ParNames]
df04['04_CO (ppm)']=df04['04_CO (ppm)']/1000
df04['04_Pressure (hPa)']=df04['04_Pressure (hPa)']/100
df04 = df04['2021-04-26':]
df04.plot(subplots=True)
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>], dtype=object)
df04.loc[(df04['04_CO (ppm)']<0.001)|(df04['04_CO (ppm)']>130000),'04_CO (ppm)']=nan
df04.loc['2021-06-09 15:00:00':'2021-06-09 16:30:00']=nan
df04[['04_NO2 (ppb)']].plot()
<AxesSubplot:>
df04.to_csv(Path(TabDir/'df4.csv',index = True, header=True), float_format="%.03f")
os.listdir(DataDir)
['06_AeroqualS500PM_5002-2D82-001', '13_SimplicityV2_IMTAQS0003', '16_RAMP_173', '07_AeroqualS500CO_ECM-1906191-003', '28_RAMP_177', '02_MetoneES642_U16489', '09_SimplicityV1_CCSENV020', '05_Vaisala_S1830003', '00_Reference_VanderbijilparkSAWS', 'ReferenceAQMS_Calibrations', '20210408_1037_09_SimplicityV1_CCSENV020_DK.csv', '08_OizomePolludrone_EA01P0001', '18_SensorsAfricaPlantower_108', '17_ThermoGM5000_CM21035290', '10_SimplicityV1_CCSENV011', '22_Atmos_84CCA8B167D2', '20210923_0000_00_Reference_GB.csv', '12_SimplicityV2_IMTAQS0002', '01_MetOneES642_U16486', '15_ECOMSMART_ECS-C-XXFNON-20149', '04_QuantAQARISense_SN000-59', '23_Atmos_98F4ABDCA328', '11_SimplicityV2_IMTAQS0001', '14_SimplicityV2_IMTAQS0004', '21_EarthsenseZephyr_729-SA', '22_Atmos_Com.csv', '26_EarthsenseZephyr_Z00533', '24_Dylos_', '03_QuantAQARISense_SN000-57', '20_EarthsenseZephyr_642-SA', '27_OizomePolludrone_PM01P007', '08_Polludrone.csv']
Files to read
DataFiles =['20210524_1047_05_Vaisala_108_AR1.csv',
'20210517_1047_05_Vaisala_108_AR1.csv',
'20210503_1047_05_Vaisala_108_AR1.csv',
'20210426_1047_05_Vaisala_108_AR1.csv',
'20210615_1235_05_Vaisala_S1830003_DK1.csv',
'20210823_1006_05_Vaisala_S1830003_DK1.csv',
'20210629_0945_05_Vaisala_S1830003_AR1.csv',
'20210621_1028_05_Vaisala_S1830003_DK1.csv',
'20210816_1113_05_Vaisala_S1830003_DK1.csv',
'20210906_1027_05_Vaisala_S1830003_DK1.csv',
'20210830_0916_05_Vaisala_S1830003_DK1.csv',
'20210614_1233_05_Vaisala_S1830003_DK1.csv',
'20210705_1057_05_Vaisala_S1830003_AR1.csv',
'20210726_1226_05_Vaisala_S1830003_AR1.csv',
'20210913_0918_05_Vaisala_S1830003_DK1.csv',
'20210719_0940_05_Vaisala_S1830003_DK1.csv',
'20210712_1141_05_Vaisala_S1830003_DK1.csv',
'20210529_1226_05_Vaisala_S1830003_DK1.csv',
'20210920_1016_05_Vaisala_S1830003_DK1.csv',
'20210719_1226_05_Vaisala_S1830003_AR1.csv',
'20210808_1047_05_Vaisala_S1830003_AR1.csv',
'20210628_1047_05_Vaisala_S1830003_AR1.csv',
'20210528_1047_05_Vaisala_S1830003_AR1.csv',
'20210510_1047_05_Vaisala_108_AR1.csv']
Vars =["05_NO2 (ppb)",
"05_SO2 (ppb)",
"05_CO (ppm)",
"05_O3 (ppb)",
"05_PM2.5 (ug/m3)",
"05_PM10 (ug/m3)",
"05_Ambient_Temperature (degC)",
"05_Relative_Humidity (%)",
"05_Pressure (hPa)",
"05_Validity"]
df5 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp = pd.read_csv(Path(DataDir/'05_Vaisala_S1830003'/File),sep=';', skiprows=5, parse_dates=[0], index_col=0, names=Vars)
if len(df5) == 0:
df5 = dftmp
else:
df5 = df5.combine_first(dftmp)
df5.index.name='Date'
20210524_1047_05_Vaisala_108_AR1.csv 20210517_1047_05_Vaisala_108_AR1.csv 20210503_1047_05_Vaisala_108_AR1.csv 20210426_1047_05_Vaisala_108_AR1.csv 20210615_1235_05_Vaisala_S1830003_DK1.csv 20210823_1006_05_Vaisala_S1830003_DK1.csv 20210629_0945_05_Vaisala_S1830003_AR1.csv 20210621_1028_05_Vaisala_S1830003_DK1.csv 20210816_1113_05_Vaisala_S1830003_DK1.csv 20210906_1027_05_Vaisala_S1830003_DK1.csv 20210830_0916_05_Vaisala_S1830003_DK1.csv 20210614_1233_05_Vaisala_S1830003_DK1.csv 20210705_1057_05_Vaisala_S1830003_AR1.csv 20210726_1226_05_Vaisala_S1830003_AR1.csv 20210913_0918_05_Vaisala_S1830003_DK1.csv 20210719_0940_05_Vaisala_S1830003_DK1.csv 20210712_1141_05_Vaisala_S1830003_DK1.csv 20210529_1226_05_Vaisala_S1830003_DK1.csv 20210920_1016_05_Vaisala_S1830003_DK1.csv 20210719_1226_05_Vaisala_S1830003_AR1.csv 20210808_1047_05_Vaisala_S1830003_AR1.csv 20210628_1047_05_Vaisala_S1830003_AR1.csv 20210528_1047_05_Vaisala_S1830003_AR1.csv 20210510_1047_05_Vaisala_108_AR1.csv
for v in ["05_NO2 (ppb)",
"05_SO2 (ppb)",
"05_CO (ppm)",
"05_O3 (ppb)",
"05_PM2.5 (ug/m3)",
"05_PM10 (ug/m3)",
"05_Ambient_Temperature (degC)",
"05_Relative_Humidity (%)",
"05_Pressure (hPa)"]:
df5.loc[(df5["05_Validity"]!=1) ,v]=nan
df5.index = df5.index.tz_localize('Africa/Johannesburg')
# To drop the duplicate index row from the dataframe.
df5 = df5[~df5.index.duplicated(keep='first')]
df5=df5.resample('5min').mean()
Quick View
# Quick graph view to see if data is available.
df5.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
# Saving the Level 0 raw data file to the specified directory.
df5.to_csv(Path(TabDir/'df5.csv',index = True, header=True))
Import Data
VarNames=['06_Monitor ID',
'06_Location ID',
'06_PM10 (ug/m3)',
'06_PM2.5 (ug/m3)']
df6=pd.DataFrame()
for f in os.listdir(Path(DataDir / '06_AeroqualS500PM_5002-2D82-001')):
if len(df6)==0:
df6=pd.read_csv(Path(DataDir / '06_AeroqualS500PM_5002-2D82-001' / f), parse_dates=[0], skiprows=1,index_col=0, names=VarNames)
else:
df6=df6.combine_first(pd.read_csv(Path(DataDir / '06_AeroqualS500PM_5002-2D82-001' / f), skiprows=1, parse_dates=[0], index_col=0, names=VarNames))
df6['06_PM10 (ug/m3)']=df6['06_PM10 (ug/m3)']*1000
df6['06_PM2.5 (ug/m3)']=df6['06_PM2.5 (ug/m3)']*1000
df6.loc[(df6.index >= pd.to_datetime('2021-10-29 00:00')),'06_PM10 (ug/m3)']=nan
df6.loc[(df6.index >= pd.to_datetime('2021-10-29 00:00')),'06_PM2.5 (ug/m3)']=nan
df6 = df6['2021-04-26':]
df6.index = df6.index.tz_localize('UTC')
df6.index = df6.index.tz_convert('Africa/Johannesburg')
df6.drop(['06_Monitor ID', '06_Location ID'], axis=1, inplace=True)
df6.plot(subplots=True)
array([<AxesSubplot:>, <AxesSubplot:>], dtype=object)
df6=df6.resample('10min').mean()
df6.head()
| 06_PM10 (ug/m3) | 06_PM2.5 (ug/m3) | |
|---|---|---|
| 2021-04-26 02:00:00+02:00 | 20.0 | 15.0 |
| 2021-04-26 02:10:00+02:00 | 48.0 | 28.0 |
| 2021-04-26 02:20:00+02:00 | 22.0 | 16.0 |
| 2021-04-26 02:30:00+02:00 | 28.0 | 22.0 |
| 2021-04-26 02:40:00+02:00 | 45.0 | 27.0 |
df6.plot(subplots=True)
array([<AxesSubplot:>, <AxesSubplot:>], dtype=object)
df6.to_csv(Path(TabDir/'df6.csv'), float_format="%.3f")
Define variable names
# This assumes the input data has the following columns
# Date Time, Monitor ID, Location ID, PM10(ppm), PM2.5(ppm)
headings=["datetime",
"07_Monitor_ID",
"07_Location_ID",
"07_CO (ppm)"]
Import data
VarNames=['07_Monitor_ID',
'07_Location_ID',
'07_CO (ppm)']
df7=pd.DataFrame()
for f in os.listdir(Path(DataDir / '07_AeroqualS500CO_ECM-1906191-003')):
if len(df7)==0:
df7=pd.read_csv(Path(DataDir / '07_AeroqualS500CO_ECM-1906191-003' / f), parse_dates=[0], skiprows=1,index_col=0, names=VarNames)
else:
df7=df7.combine_first(pd.read_csv(Path(DataDir / '07_AeroqualS500CO_ECM-1906191-003' / f), skiprows=1, parse_dates=[0], index_col=0, names=VarNames))
df7 = df7.loc['2021-04-26':'2021-10-29']
df7.index = df7.index.tz_localize('UTC')
df7.index = df7.index.tz_convert('Africa/Johannesburg')
VariableLimits={'07_CO (ppm)':[0.05,25]
}
for v in VariableLimits.keys():
print(v)
df7.loc[(df7[v]<VariableLimits[v][0])|(df7[v]>VariableLimits[v][1]),v]=nan
07_CO (ppm)
Remove unused
df7.drop(['07_Monitor_ID', '07_Location_ID'], axis=1, inplace=True)
df7 = df7.resample('10min').mean()
Quick View
df7.plot(subplots=True)
array([<AxesSubplot:>], dtype=object)
df7.to_csv(Path(TabDir/'df7.csv'), float_format="%.3f")
Import data
Vars=["08_HAQI",
"08_Battery (%)",
"08_CO2 (ppm)",
"08_CO (mg/m3)",
"08_NO2 (ug/m3)",
"08_O3 (ug/m3)",
"08_NO (ppb)",
"08_SO2 (ug/m3)",
"08_Relative_Humidity (%)",
"08_Leq (dB)",
"08_Light (Lux)",
"08_Lmax (dB)",
"08_Lmin (dB)",
"08_PM2.5 (ug/m3)",
"08_PM10 (ug/m3)",
"08_Ambient_Temperature (degC)",
"08_UV (Index)"]
DataFiles = ['20210705_1020_08_Polludrone_EA01P0001_DK.csv',
'20211005_1256_08_Polludrone_EA01P0001_DK.csv',
'20210823_1032_08_Polludrone_EA01P0001_DK.csv',
'20210830_0923_08_Polludrone_EA01P0001_DK.csv',
'20210810_1001_08_Polludrone_EA01P0001_DK.csv',
'20210521_1013_08_Polludrone_EA01P0001_AR.csv',
'20210526_1013_08_Polludrone_EA01P0001_AR.csv',
'20210615_1239_08_Polludrone_EA01P0001_DK.csv',
'20210920_1028_08_Polludrone_EA01P0001_DK.csv',
'20210621_1028_08_Polludrone_EA01P0001_DK.csv',
'20210906_1039_08_Polludrone_EA01P0001_DK.csv',
'20210629_1016_08_Polludrone_EA01P0001_AR.csv',
'20210510_1016_08_Polludrone_EA01P0001_AR.csv',
'20210426_1013_08_Polludrone_EA01P0001_AR.csv',
'20210524_1013_08_Polludrone_EA01P0001_AR.csv',
'20211025_1425_08_Polludrone_EA01P0001_DK.csv',
'20210725_1013_08_Polludrone_EA01P0001_AR.csv',
'20211029_0850_08_Polludrone_EA01P0001_DK.csv',
'20210712_1215_08_Polludrone_EA01P0001_DK.csv',
'20210510_1013_08_Polludrone_EA01P0001_AR.csv',
'20210719_1009_08_Polludrone_EA01P0001_DK.csv',
'20210816_1120_08_Polludrone_EA01P0001_DK.csv',
'20210426_1016_08_Polludrone_EA01P0001_AR.csv',
'20210913_0925_08_Polludrone_EA01P0001_DK.csv',
'20210517_1016_08_Polludrone_EA01P0001_AR.csv']
df8 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp = pd.read_csv(Path(DataDir/'08_OizomePolludrone_EA01P0001'/File),skiprows=1, parse_dates=[0], dayfirst=True, index_col=0, names=Vars)
if len(df8) == 0:
df8 = dftmp
else:
df8 = df8.combine_first(dftmp)
df8.index.name='Date'
df8 = df8['2021-04-26':]
20210705_1020_08_Polludrone_EA01P0001_DK.csv 20211005_1256_08_Polludrone_EA01P0001_DK.csv 20210823_1032_08_Polludrone_EA01P0001_DK.csv 20210830_0923_08_Polludrone_EA01P0001_DK.csv 20210810_1001_08_Polludrone_EA01P0001_DK.csv 20210521_1013_08_Polludrone_EA01P0001_AR.csv 20210526_1013_08_Polludrone_EA01P0001_AR.csv 20210615_1239_08_Polludrone_EA01P0001_DK.csv 20210920_1028_08_Polludrone_EA01P0001_DK.csv 20210621_1028_08_Polludrone_EA01P0001_DK.csv 20210906_1039_08_Polludrone_EA01P0001_DK.csv 20210629_1016_08_Polludrone_EA01P0001_AR.csv 20210510_1016_08_Polludrone_EA01P0001_AR.csv 20210426_1013_08_Polludrone_EA01P0001_AR.csv 20210524_1013_08_Polludrone_EA01P0001_AR.csv 20211025_1425_08_Polludrone_EA01P0001_DK.csv 20210725_1013_08_Polludrone_EA01P0001_AR.csv 20211029_0850_08_Polludrone_EA01P0001_DK.csv 20210712_1215_08_Polludrone_EA01P0001_DK.csv 20210510_1013_08_Polludrone_EA01P0001_AR.csv 20210719_1009_08_Polludrone_EA01P0001_DK.csv 20210816_1120_08_Polludrone_EA01P0001_DK.csv 20210426_1016_08_Polludrone_EA01P0001_AR.csv 20210913_0925_08_Polludrone_EA01P0001_DK.csv 20210517_1016_08_Polludrone_EA01P0001_AR.csv
df8.index = df8.index.tz_localize('Africa/Johannesburg')
df8 = df8.sort_index()
#Creating anew or additional colum to the existing columns that needs to be converted by writing a offset.
df8['08_NO2 (ppb)']=df8['08_NO2 (ug/m3)']/1.886
df8['08_SO2 (ppb)']=df8['08_SO2 (ug/m3)']/2.6178
df8['08_O3 (ppb)']=df8['08_O3 (ug/m3)']/1.967
df8['08_CO (ppm)']=df8['08_CO (mg/m3)']/1.153
df8.loc[(df8['08_PM10 (ug/m3)']<0.001)|(df8['08_PM10 (ug/m3)']>1000),'08_PM10 (ug/m3)']=nan
df8.loc[(df8['08_PM2.5 (ug/m3)']<0.001)|(df8['08_PM2.5 (ug/m3)']>1000),'08_PM2.5 (ug/m3)']=nan
df8.loc[(df8['08_NO2 (ppb)']<0.001)|(df8['08_NO2 (ppb)']>2000),'08_NO2 (ppb)']=nan
df8.loc[(df8['08_SO2 (ppb)']<0.001)|(df8['08_SO2 (ppb)']>2000),'08_SO2 (ppb)']=nan
df8.loc[(df8['08_O3 (ppb)']<0.001)|(df8['08_O3 (ppb)']>2000),'08_O3 (ppb)']=nan
df8.loc[(df8['08_CO (ppm)']<0.001)|(df8['08_CO (ppm)']>1000),'08_CO (ppm)']=nan
#df8.loc[(df8['08_Ambient_Temperature (degC)']<-20)|(df8['08_Ambient_Temperature (degC)']>85),'08_Ambient_Temperature (degC)']=nan
df8.loc[(df8['08_Relative_Humidity (%)']<0.001)|(df8['08_Relative_Humidity (%)']>100),'08_Relative_Humidity (%)']=nan
var8=["08_CO2 (ppm)",
"08_CO (ppm)",
"08_NO2 (ppb)",
"08_O3 (ppb)",
"08_NO (ppb)",
"08_SO2 (ppb)",
"08_Relative_Humidity (%)",
"08_PM2.5 (ug/m3)",
"08_PM10 (ug/m3)",
"08_Ambient_Temperature (degC)"]
df8=df8[var8]
df8=df8.resample('10min').mean()
df8.plot(subplots= True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df8.to_csv(Path(TabDir/'df8.csv'), float_format="%.3f")
Import data
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"09_NO2 (ppm)",
"09_O3 (ppm)" ,
"09_SO2 (ppm)",
"09_CO (ppm)" ,
"09_Pressure (hPa)",
"09_TVOC (ppb)",
"09_eCO2 (ppm)",
"09_Internal_Temperature (degC)",
"09_Ambient_Temperature (degC)",
"09_Internal_Relative_Humidity (%)" ,
"09_Ambient_Relative_Humidity (%)" ,
"09_PM1 (ug/m3)",
"09_PM2.5 (ug/m3)",
"09_PM10 (ug/m3)",
"09_Longitude",
"09_Latitude",
"09_Altitude",
"09_Method"]
df9 = pd.read_csv(Path(DataDir/'09_SimplicityV1_CCSENV020'/'20211029_1039_09_SimplicityV1_CCSENV020_DK.csv.csv'),skiprows=1, names=headings)
df9.index.name='Date'
df9 = df9.sort_index()
#This needs to be done to change sensor time and upload time to datetime type
# The sensortime is wonkey and we needed to use uploadtime
#df9['sensortime']=pd.to_datetime(df9['sensortime'], errors='coerce')
df9['uploadtime']=pd.to_datetime(df9['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
#df9.loc[df9['sensortime'].isna(),'sensortime']=df9['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df9=df9[~df9['uploadtime'].duplicated()]
# Make sensortime the index
df9=df9.set_index('uploadtime').sort_index()
df9.index = df9.index.tz_localize('Africa/Johannesburg')
df9.index.name = 'Date'
df9=df9.resample('5min').mean()
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df9=df9[StartTime:EndTime]
# Need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df9['09_NO2 (ppb)']=df9['09_NO2 (ppm)']*1000
df9['09_O3 (ppb)']=df9['09_O3 (ppm)']*1000
df9['09_SO2 (ppb)']=df9['09_SO2 (ppm)']*1000
df9.columns
Index(['09_NO2 (ppm)', '09_O3 (ppm)', '09_SO2 (ppm)', '09_CO (ppm)',
'09_Pressure (hPa)', '09_TVOC (ppb)', '09_eCO2 (ppm)',
'09_Internal_Temperature (degC)', '09_Ambient_Temperature (degC)',
'09_Internal_Relative_Humidity (%)', '09_Ambient_Relative_Humidity (%)',
'09_PM1 (ug/m3)', '09_PM2.5 (ug/m3)', '09_PM10 (ug/m3)', '09_Longitude',
'09_Latitude', '09_Altitude', '09_NO2 (ppb)', '09_O3 (ppb)',
'09_SO2 (ppb)'],
dtype='object')
var09=['09_CO (ppm)',
'09_Pressure (hPa)',
'09_TVOC (ppb)',
'09_eCO2 (ppm)',
'09_Internal_Temperature (degC)',
'09_Ambient_Temperature (degC)',
'09_Internal_Relative_Humidity (%)',
'09_Ambient_Relative_Humidity (%)',
'09_PM1 (ug/m3)',
'09_PM2.5 (ug/m3)',
'09_PM10 (ug/m3)',
'09_NO2 (ppb)',
'09_O3 (ppb)',
'09_SO2 (ppb)']
df9=df9[var09]
df9.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df9.to_csv(Path(TabDir/'df9.csv'), float_format="%.3f")
Import data
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"10_NO2 (ppm)",
"10_O3 (ppm)" ,
"10_SO2 (ppm)",
"10_CO (ppm)" ,
"10_Pressure (hPa)",
"10_TVOC (ppb)",
"10_eCO2 (ppm)",
"10_Internal_Temperature (degC)",
"10_Ambient_Temperature (degC)",
"10_Internal_Relative_Humidity (%)" ,
"10_Ambient_Relative_Humidity (%)" ,
"10_PM1 (ug/m3)",
"10_PM2.5 (ug/m3)",
"10_PM10 (ug/m3)",
"10_Longitude",
"10_Latitude",
"10_Altitude",
"10_Method"]
df10=pd.read_csv(DataDir/'10_SimplicityV1_CCSENV011/20211029_1039_10_SimplicityV1_CCSENV011_DK.csv.csv', skiprows=1, names=headings)
df10=df10.sort_index()
#This needs to be done to change sensor time and upload time to datetime type
df10['sensortime']=pd.to_datetime(df10['sensortime'], errors='coerce')
df10['uploadtime']=pd.to_datetime(df10['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
df10.loc[df10['sensortime'].isna(),'sensortime']=df10['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df10=df10[~df10['sensortime'].duplicated()]
# Make sensortime the index
df10=df10.set_index('sensortime').sort_index()
df10.index = df10.index.tz_localize('Africa/Johannesburg')
df10.index.name = 'Date'
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df10=df10[StartTime:EndTime]
# Need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df10['10_NO2 (ppb)']=df10['10_NO2 (ppm)']*1000
df10['10_O3 (ppb)']=df10['10_O3 (ppm)']*1000
df10['10_SO2 (ppb)']=df10['10_SO2 (ppm)']*1000
var10=['10_CO (ppm)',
'10_Pressure (hPa)',
'10_TVOC (ppb)',
'10_eCO2 (ppm)',
'10_Internal_Temperature (degC)',
'10_Ambient_Temperature (degC)',
'10_Internal_Relative_Humidity (%)',
'10_Ambient_Relative_Humidity (%)',
'10_PM1 (ug/m3)',
'10_PM2.5 (ug/m3)',
'10_PM10 (ug/m3)',
'10_NO2 (ppb)',
'10_O3 (ppb)',
'10_SO2 (ppb)']
df10=df10[var10]
df10 = df10.sort_index()
df10 = df10.resample('5min').mean()
df10.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df10.loc[(df10['10_NO2 (ppb)']<0.001)|(df10['10_NO2 (ppb)']>5000),'10_NO2 (ppb)']=nan
df10.to_csv(Path(TabDir/'df10.csv'), float_format="%.3f")
# This assumes the input data has the following columns
# Upload_Time,Ut_Rounded,Sensor_Time,NO2_PPM,O3_PPM ,SO2_PPM,CO_PPM ,Pressure_hPa,TVOC_PPB,eCO2_PPM,Internal_Temperature,External_Temperature,Internal_Humidity ,External_Humidity ,PM1_0 ug/m^3,PM2_5 ug/m^3,PM10 ug/m^3,Longitude,Latitude,Altitude,Upload_Method
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"11_NO2 (ppm)",
"11_O3 (ppm)" ,
"11_SO2 (ppm)",
"11_CO (ppm)" ,
"11_Pressure (hPa)",
"11_TVOC (ppb)",
"11_eCO2 (ppm)",
"11_Internal_Temperature (degC)",
"11_Ambient_Temperature (degC)",
"11_Internal_Relative_Humidity (%)" ,
"11_Ambient_Relative_Humidity (%)" ,
"11_PM1 (ug/m3)",
"11_PM2.5 (ug/m3)",
"11_PM10 (ug/m3)",
"11_Longitude",
"11_Latitude",
"11_Altitude",
"11_Method"]
Import data
df11=pd.read_csv(DataDir/'11_SimplicityV2_IMTAQS0001/20211029_1040_11_SimplicityV2_IMTAQS0001_DK.csv', skiprows=1, names=headings)
df11=df11.sort_index()
#This needs to be done to change sensor time and upload time to datetime type
df11['sensortime']=pd.to_datetime(df11['sensortime'], errors='coerce')
df11['uploadtime']=pd.to_datetime(df11['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
df11.loc[df11['sensortime'].isna(),'sensortime']=df11['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df11=df11[~df11['sensortime'].duplicated()]
# Make sensortime the index
df11=df11.set_index('sensortime').sort_index()
df11.index = df11.index.tz_localize('Africa/Johannesburg')
df11.index.name = 'Date'
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df11=df11[StartTime:EndTime]
# Need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df11['11_NO2 (ppb)']=df11['11_NO2 (ppm)']*1000
df11['11_O3 (ppb)']=df11['11_O3 (ppm)']*1000
df11['11_SO2 (ppb)']=df11['11_SO2 (ppm)']*1000
df11 = df11.resample('5min').mean()
var11=['11_CO (ppm)',
'11_Pressure (hPa)',
'11_TVOC (ppb)',
'11_eCO2 (ppm)',
'11_Internal_Temperature (degC)',
'11_Ambient_Temperature (degC)',
'11_Internal_Relative_Humidity (%)',
'11_Ambient_Relative_Humidity (%)',
'11_PM1 (ug/m3)',
'11_PM2.5 (ug/m3)',
'11_PM10 (ug/m3)',
'11_NO2 (ppb)',
'11_O3 (ppb)',
'11_SO2 (ppb)']
df11=df11[var11]
#removing dates with unrealistic values
df11.loc['2021-10-21 16:00:00':'2021-10-21 17:00:00']=nan
df11.loc['2021-10-05 14:00:00':'2021-10-05 15:00:00']=nan
df11.loc['2021-10-02 00:00:00':'2021-10-02 03:00:00']=nan
df11.loc['2021-09-25 00:00:00':'2021-09-29 00:00:00']=nan
df11.loc['2021-08-24 00:00:00':'2021-08-24 01:00:00']=nan
df11.loc['2021-10-21 17:00:00':'2021-10-21 19:00:00']=nan
Quick view
df11.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df11.to_csv(Path(TabDir/'df11.csv'), float_format="%.3f")
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"12_NO2 (ppm)",
"12_O3 (ppm)" ,
"12_SO2 (ppm)",
"12_CO (ppm)" ,
"12_Pressure (hPa)",
"12_TVOC (ppb)",
"12_eCO2 (ppm)",
"12_Internal_Temperature (degC)",
"12_Ambient_Temperature (degC)",
"12_Internal_Relative_Humidity (%)" ,
"12_Ambient_Relative_Humidity (%)" ,
"12_PM1 (ug/m3)",
"12_PM2.5 (ug/m3)",
"12_PM10 (ug/m3)",
"12_Longitude",
"12_Latitude",
"12_Altitude",
"12_Method"]
df12=pd.read_csv(DataDir/'12_SimplicityV2_IMTAQS0002/20211029_1040_12_SimplicityV2_IMTAQS0002_DK.csv', skiprows=1, names=headings)
df12=df12.sort_index()
Import data
#This needs to be done to change sensor time and upload time to datetime type
df12['sensortime']=pd.to_datetime(df12['sensortime'], errors='coerce')
df12['uploadtime']=pd.to_datetime(df12['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
df12.loc[df12['sensortime'].isna(),'sensortime']=df12['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df12=df12[~df12['sensortime'].duplicated()]
# Make sensortime the index
df12=df12.set_index('sensortime').sort_index()
df12.index = df12.index.tz_localize('Africa/Johannesburg')
df12.index.name = 'Date'
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df12=df12[StartTime:EndTime]
# Need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df12['12_NO2 (ppb)']=df12['12_NO2 (ppm)']*1000
df12['12_O3 (ppb)']=df12['12_O3 (ppm)']*1000
df12['12_SO2 (ppb)']=df12['12_SO2 (ppm)']*1000
df12=df12.resample('5min').mean()
var12=['12_CO (ppm)',
'12_Pressure (hPa)',
'12_TVOC (ppb)',
'12_eCO2 (ppm)',
'12_Internal_Temperature (degC)',
'12_Ambient_Temperature (degC)',
'12_Internal_Relative_Humidity (%)',
'12_Ambient_Relative_Humidity (%)',
'12_PM1 (ug/m3)',
'12_PM2.5 (ug/m3)',
'12_PM10 (ug/m3)',
'12_NO2 (ppb)',
'12_O3 (ppb)',
'12_SO2 (ppb)']
df12=df12[var12]
df12.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 53407 entries, 2021-04-27 00:00:00+02:00 to 2021-10-29 10:30:00+02:00 Freq: 5T Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 12_CO (ppm) 24374 non-null float64 1 12_Pressure (hPa) 24374 non-null float64 2 12_TVOC (ppb) 24374 non-null float64 3 12_eCO2 (ppm) 24374 non-null float64 4 12_Internal_Temperature (degC) 24374 non-null float64 5 12_Ambient_Temperature (degC) 24374 non-null float64 6 12_Internal_Relative_Humidity (%) 24374 non-null float64 7 12_Ambient_Relative_Humidity (%) 24374 non-null float64 8 12_PM1 (ug/m3) 24374 non-null float64 9 12_PM2.5 (ug/m3) 24374 non-null float64 10 12_PM10 (ug/m3) 24374 non-null float64 11 12_NO2 (ppb) 24374 non-null float64 12 12_O3 (ppb) 24374 non-null float64 13 12_SO2 (ppb) 24374 non-null float64 dtypes: float64(14) memory usage: 6.1 MB
df12.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
# Remove dates with unrealistic dates
#df12.loc['2021-10-21 16:00:00':'2021-10-21 18:00:00']=nan
#df12.loc['2021-10-05 14:00:00':'2021-10-05 16:00:00']=nan
#df12.loc['2021-08-24 00:00:00':'2021-08-24 01:00:00']=nan
#df12.loc['2021-10-02 00:00:00':'2021-10-02 03:00:00']=nan
#df12.loc['2021-07-05 20:00:00':'2021-07-05 20:30:00']=nan
#df12.loc['2021-10-17 20:00:00':'2021-10-17 23:00:00']=nan
#df12.loc['2021-10-10 11:30:00':'2021-10-10 12:00:00']=nan
df12.to_csv(Path(TabDir/'df12.csv'), float_format="%.3f")
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"13_NO2 (ppm)",
"13_O3 (ppm)" ,
"13_SO2 (ppm)",
"13_CO (ppm)" ,
"13_Pressure (hPa)",
"13_TVOC (ppb)",
"13_eCO2 (ppm)",
"13_Internal_Temperature (degC)",
"13_Ambient_Temperature (degC)",
"13_Internal_Relative_Humidity (%)" ,
"13_Ambient_Relative_Humidity (%)" ,
"13_PM1 (ug/m3)",
"13_PM2.5 (ug/m3)",
"13_PM10 (ug/m3)",
"13_Longitude",
"13_Latitude",
"13_Altitude",
"13_Method"]
df13=pd.read_csv(DataDir/'13_SimplicityV2_IMTAQS0003/20211029_1040_13_SimplicityV2_IMTAQS0003_DK.csv', skiprows=1, names=headings)
df13=df13.sort_index()
Import data
#This needs to be done to change sensor time and upload time to datetime type
df13['sensortime']=pd.to_datetime(df13['sensortime'], errors='coerce')
df13['uploadtime']=pd.to_datetime(df13['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
df13.loc[df13['sensortime'].isna(),'sensortime']=df13['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df13=df13[~df13['sensortime'].duplicated()]
# Make sensortime the index
df13=df13.set_index('sensortime').sort_index()
df13.index = df13.index.tz_localize('Africa/Johannesburg')
df13.index.name = 'Date'
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df13=df13[StartTime:EndTime]
#First need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df13['13_NO2 (ppb)']=df13['13_NO2 (ppm)']*1000
df13['13_O3 (ppb)']=df13['13_O3 (ppm)']*1000
df13['13_SO2 (ppb)']=df13['13_SO2 (ppm)']*1000
var13=['13_CO (ppm)',
'13_Pressure (hPa)',
'13_TVOC (ppb)',
'13_eCO2 (ppm)',
'13_Internal_Temperature (degC)',
'13_Ambient_Temperature (degC)',
'13_Internal_Relative_Humidity (%)',
'13_Ambient_Relative_Humidity (%)',
'13_PM1 (ug/m3)',
'13_PM2.5 (ug/m3)',
'13_PM10 (ug/m3)',
'13_NO2 (ppb)',
'13_O3 (ppb)',
'13_SO2 (ppb)']
df13=df13[var13]
# Plot gas variables
fig, [ax1, ax2, ax3, ax4] = plt.subplots(4, 1, sharex=True, figsize=(7,6))
df13[['13_O3 (ppb)']].plot(ax=ax1, color='blue')
df13[['13_CO (ppm)']].plot(ax=ax2, color='orange')
df13[['13_NO2 (ppb)']].plot(ax=ax3, color='purple')
df13[['13_SO2 (ppb)']].plot(ax=ax4, color='red')
# Change legend names to instrument serial numbers for ease of identification
ax1.legend(["13_O3 (ppb)"])
ax2.legend(["13_CO (ppm)"])
ax3.legend(["13_NO2 (ppb)"])
ax4.legend(["13_SO2 (ppb)"])
<matplotlib.legend.Legend at 0x7f4e64d33880>
# Plot particulate matter variables
fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(7,6))
df13[['13_PM2.5 (ug/m3)']].plot(ax=ax1, color='blue')
df13[['13_PM10 (ug/m3)']].plot(ax=ax2, color='orange')
# Change legend names to instrument serial numbers for ease of identification
ax1.legend(["13_PM2.5 (ug/m3)"])
ax2.legend(["13_PM10 (ug/m3"])
<matplotlib.legend.Legend at 0x7f4e64296050>
#Remove dates with unrealistic value
df13.loc['2021-10-21 16:00:00':'2021-10-21 17:00:00']=nan
df13.loc['2021-10-05 14:30:00':'2021-10-05 14:45:00']=nan
df13.loc['2021-10-02 00:00:00':'2021-10-02 03:00:00']=nan
df13.loc['2021-08-24 00:00:00':'2021-08-24 01:00:00']=nan
# Plot gas variables
fig, [ax1, ax2, ax3, ax4] = plt.subplots(4, 1, sharex=True, figsize=(7,6))
df13[['13_O3 (ppb)']].plot(ax=ax1, color='blue')
df13[['13_CO (ppm)']].plot(ax=ax2, color='orange')
df13[['13_NO2 (ppb)']].plot(ax=ax3, color='purple')
df13[['13_SO2 (ppb)']].plot(ax=ax4, color='red')
# Change legend names to instrument serial numbers for ease of identification
ax1.legend(["13_O3 (ppb)"])
ax2.legend(["13_CO (ppm)"])
ax3.legend(["13_NO2 (ppb)"])
ax4.legend(["13_SO2 (ppb)"])
<matplotlib.legend.Legend at 0x7f4e63b92530>
df13.to_csv(Path(TabDir/'df13.csv'), float_format="%.3f")
headings=["uploadtime",
"uploadtime_rounded",
"sensortime",
"14_NO2 (ppm)",
"14_O3 (ppm)" ,
"14_SO2 (ppm)",
"14_CO (ppm)" ,
"14_Pressure (hPa)",
"14_TVOC (ppb)",
"14_eCO2 (ppm)",
"14_Internal_Temperature (degC)",
"14_Ambient_Temperature (degC)",
"14_Internal_Relative_Humidity (%)" ,
"14_Ambient_Relative_Humidity (%)" ,
"14_PM1 (ug/m3)",
"14_PM2.5 (ug/m3)",
"14_PM10 (ug/m3)",
"14_Longitude",
"14_Latitude",
"14_Altitude",
"14_Method"]
df14=pd.read_csv(DataDir/'14_SimplicityV2_IMTAQS0004/20211029_1040_14_SimplicityV2_IMTAQS0004_DK.csv', skiprows=1, names=headings)
df14=df14.sort_index()
Import data
#This needs to be done to change sensor time and upload time to datetime type
df14['sensortime']=pd.to_datetime(df14['sensortime'], errors='coerce')
df14['uploadtime']=pd.to_datetime(df14['uploadtime'], errors='coerce')
# Fill bad (missing) 'sensortime' rows with 'uploadtime' dates
df14.loc[df14['sensortime'].isna(),'sensortime']=df14['uploadtime']
# Remove duplicated 'sensortime' rows if they exist
df14=df14[~df14['sensortime'].duplicated()]
# Make sensortime the index
df14=df14.set_index('sensortime').sort_index()
df14.index = df14.index.tz_localize('Africa/Johannesburg')
# Limit the dataset to start and end times of the campaign
StartTime='2021-04-26'
EndTime='2021-10-29'
df14=df14[StartTime:EndTime]
#Need to create new converted (ppm to ppb) variables for NO2, O3 and SO2
df14['14_NO2 (ppb)']=df14['14_NO2 (ppm)']*1000
df14['14_O3 (ppb)']=df14['14_O3 (ppm)']*1000
df14['14_SO2 (ppb)']=df14['14_SO2 (ppm)']*1000
var14=['14_CO (ppm)',
'14_Pressure (hPa)',
'14_TVOC (ppb)',
'14_eCO2 (ppm)',
'14_Internal_Temperature (degC)',
'14_Ambient_Temperature (degC)',
'14_Internal_Relative_Humidity (%)',
'14_Ambient_Relative_Humidity (%)',
'14_PM1 (ug/m3)',
'14_PM2.5 (ug/m3)',
'14_PM10 (ug/m3)',
'14_NO2 (ppb)',
'14_O3 (ppb)',
'14_SO2 (ppb)']
df14=df14[var14]
df14=df14.resample('5min').mean()
df14.index.name = 'Date'
df14.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df14.to_csv(Path(TabDir/'df14.csv'), float_format="%.3f")
Import data
DataFiles =['20210524_1506_15_EcomSmart_ECS-C-XXFNON-20149_BL.csv',
'20210517_0510_15_EcomSmart_ECS-C-XXFNON-20149_LP.csv',
'20210727_1032_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv.csv',
'20210601_1032_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv.csv',
'20210621_1028_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210810_1008_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210906_1048_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210705_1059_15_EcomSmart_ECS-C-XXFNON-20149_AR..csv',
'20210726_1506_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv',
'20210920_1032_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210910_1008_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210712_1223_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210524_1506_15_EcomSmart_ECS-C-XXFNON-20149_BL.csv',
'20210816_1128_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210830_0927_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210823_1036_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210913_0930_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20210719_1019_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210628_1015_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv',
'20210615_0415_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210628_1026_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv',
'20210629_1015_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv',
'20211005_1301_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20211025_1433_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv',
'20211029_0857_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv']
Vars =["Date",
"Date (Africa/Johannesburg)",
"15_Main_Supply (%)",
"15_Ambient_Temperature (degC)",
"15_Relative_Humidity (%)",
"15_Pressure (hPa)",
"15_SO2 (ppb)",
"15_NO2 (ppb)",
"15_O3 (ppb)",
"15_PM1 (ug/m3)",
"15_PM2.5 (ug/m3)",
"15_PM10 (ug/m3)",
"15_Latitude",
"15_Longitude",
"15_GPS_Date (hhmmss)",
"15_Sensor_Number",
"15_Internal_Battery (%)",
"15_Firmware_Revision",
"15_Communication",
"15_Wind_Direction (m/s)",
"15_Winds_Speed (degrees)",
"15_Number_Satellites"]
df15 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp = pd.read_csv(Path(DataDir/'15_ECOMSMART_ECS-C-XXFNON-20149'/File),sep=";", skiprows=1, parse_dates=[0],index_col=0, names=Vars)
if len(df15) == 0:
df15 = dftmp
else:
df15 = df15.combine_first(dftmp)
20210524_1506_15_EcomSmart_ECS-C-XXFNON-20149_BL.csv 20210517_0510_15_EcomSmart_ECS-C-XXFNON-20149_LP.csv 20210727_1032_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv.csv 20210601_1032_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv.csv 20210621_1028_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210810_1008_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210906_1048_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210705_1059_15_EcomSmart_ECS-C-XXFNON-20149_AR..csv 20210726_1506_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv 20210920_1032_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210910_1008_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210712_1223_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210524_1506_15_EcomSmart_ECS-C-XXFNON-20149_BL.csv 20210816_1128_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210830_0927_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210823_1036_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210913_0930_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20210719_1019_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210628_1015_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv 20210615_0415_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210628_1026_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv 20210629_1015_15_EcomSmart_ECS-C-XXFNON-20149_AR.csv 20211005_1301_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20211025_1433_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv 20211029_0857_15_EcomSmart_ECS-C-XXFNON-20149_DK.csv.csv
df15.index = df15.index.tz_localize('UTC')
df15.index = df15.index.tz_convert('Africa/Johannesburg')
var15=["15_Ambient_Temperature (degC)",
"15_Relative_Humidity (%)",
"15_Pressure (hPa)",
"15_SO2 (ppb)",
"15_NO2 (ppb)",
"15_O3 (ppb)",
"15_PM1 (ug/m3)",
"15_PM2.5 (ug/m3)",
"15_PM10 (ug/m3)",
"15_Internal_Battery (%)",
"15_Wind_Direction (m/s)",
"15_Winds_Speed (degrees)"]
df15=df15[var15]
df15=df15.resample('10min').mean()
Quick look
df15.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df15.to_csv(Path(TabDir/'df15.csv',index = True, header=True), float_format="%.3f")
Import data
s = '17'
headers14=['Date','CO (ppm)','Internal Temperature','NO (ppb)',
'NO2 (ppb)','O3 (ppb)','PM1','PM10 (ug/m3)','PM2.5 (ug/m3)','PRGM','Relative Humidity (%)',
'Sensor Temperature','SO2 (ppb)','VOC']
headers21=['id','instrumentName','Date','Time','CO (ppm)','O3 (ppb)','NO2 (ppb)','SO2 (ppb)','NO (ppb)','VOC',
'PM2.5 (ug/m3)','PM10 (ug/m3)','Relative Humidity (%)','pressure','sensorTemp','heaterTemp',
'internalTemp','timeRec','UTCdate','UTCtime','alarms']
df17 = pd.DataFrame()
files = [f for f in os.listdir(DataDir/'17_ThermoGM5000_CM21035290/') if f.endswith(".csv")]
for f in files:
fullpath = os.path.join(DataDir/'17_ThermoGM5000_CM21035290/',f)
print('reading',f)
dftmp = pd.read_csv(fullpath,skiprows=2,encoding='utf-8', header=None)
if len(dftmp.columns)==14:
dftmp.columns=headers14
dftmp['d']=pd.to_datetime(dftmp['Date'], dayfirst=True)
dftmp=dftmp.set_index('d')
dftmp.index.names= ['Date']
elif len(dftmp.columns)==21:
dftmp.columns=headers21
dftmp['d']=pd.to_datetime(dftmp['Date']+' '+dftmp['Time'], dayfirst=True)
dftmp=dftmp.set_index('d')
dftmp.index.names= ['Date']
#dftmp['d']=pd.to_datetime(dftmp['Date']+' '+dftmp['Time'], dayfirst=True)
#dftmp=dftmp.set_index('d')
#dftmp.index.names= ['Date']
#dftmp = dftmp.drop(columns=['Date','Time'])
#print('finished with', f)
if len(df17) == 0:
df17 = dftmp
else:
df17 = pd.concat([dftmp,df17],axis=0)
df17 = df17.loc['2021-04-26':'2021-10-29'].resample('5min').mean()
df17 = df17.drop_duplicates()
var = ['CO (ppm)', 'O3 (ppb)', 'NO2 (ppb)',
'SO2 (ppb)', 'NO (ppb)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)',
'Relative Humidity (%)']
df17 = df17[var]
df17 = df17.rename(columns={'CO (ppm)':'17_CO (ppm)', 'O3 (ppb)':'17_O3 (ppb)', 'NO2 (ppb)':'17_NO2 (ppb)', 'SO2 (ppb)':'17_SO2 (ppb)',
'NO (ppb)':'17_NO (ppb)','PM2.5 (ug/m3)':'17_PM2.5 (ug/m3)',
'PM10 (ug/m3)':'17_PM10 (ug/m3)', 'Relative Humidity (%)':'17_Relative Humidity (%)'})
reading 20210517_1145_17_GM5000_CM21035290_DK.csv reading 20210810_0739_17_GM5000_CM21035290_DK.csv reading 20210830_0933_17_GM5000_CM21035290_DK.csv reading 20210629_0839_17_GM5000_CM21035290_GB.csv reading 20210524_1145_17_GM5000_CM21035290_GB.csv reading 20210816_0641_17_GM5000__E_CM21035290_GB.csv reading 20211029_1012_17_GM5000_CM21035290_GB.csv reading 20210409_1033_17_GM5000_CM21035290_GB.csv reading 20210421_1239_17_GM5000_CM21035290_GB.csv reading 20210913_0935_17_GM5000_CM21035290_DK.csv reading 20210920_1012_17_GM5000_CM21035290_DK.csv reading 20210823_1016_17_GM5000_CM21035290_DK.csv reading 20210705_1120_17_GM5000_CM21035290_DK.csv reading 20210512_0700_17_GM5000_CM21035290_GB.csv reading 20210621_1459_17_GM5000_CM21035290_DK.csv reading 20210906_1004_17_GM5000_CM21035290_DK.csv reading 20210823_0641_17_GM5000__E_CM21035290_GB.csv reading 20210726_1033_17_GM5000_CM21035290_AR.csv reading 20210719_0928_17_GM5000_CM21035290_DK.csv reading 20210426_1239_17_GM5000_CM21035290_GB.csv reading 20210719_0641_17_GM5000_CM21035290_GB.csv reading 20210601_1000_17_GM5000_CM21035290_GB.csv reading 20210816_1037_17_GM5000_CM21035290_DK.csv
df17=df17.resample('5min').mean()
df17.index=df17.index.tz_localize('Africa/Johannesburg')
Set limits
#df17.loc['2021-05-01']=nan
df17.loc[(df17['17_PM10 (ug/m3)']<0.001)|(df17['17_PM10 (ug/m3)']>400),'17_PM10 (ug/m3)']=nan
df17.loc[(df17['17_PM2.5 (ug/m3)']>95),'17_PM2.5 (ug/m3)']=nan
df17.loc[(df17['17_NO2 (ppb)']>2000),'17_NO2 (ppb)']=nan
df17.loc[(df17['17_SO2 (ppb)']>2000),'17_SO2 (ppb)']=nan
df17.loc[(df17['17_O3 (ppb)']>2000),'17_O3 (ppb)']=nan
df17.loc[(df17['17_CO (ppm)']>1000),'17_CO (ppm)']=nan
Quick look
df17.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
Save
df17.to_csv(Path(TabDir/'df17.csv',index = True, header=True), float_format="%.3f")
Import Data
#Naming the variables in the order as it is represented in the dataframes
DataFiles= ['20210730_1016_18_SensorsAfricaPlantower_108_AR(N).csv',
'20210630_1016_18_SensorsAfricaPlantower_108_AR(N).csv',
'20210926_1000_18_SensorsAfricaPlantower_108_AR(N).csv',
'20210530_1047_18_SensorsAfricaPlantower_108_AR(N).csv',
'20210830_1000_18_SensorsAfricaPlantower_108_AR(N).csv',
'20211029_1000_18_SensorsAfricaPlantower_108_AR(N).csv',
'20210931_1000_18_SensorsAfricaPlantower_108_AR(N).csv']
Vars =['sensor_id',
'sensor_type',
'location',
'lat',
'lon',
'timestamp',
'value_type',
'value']
#importing the csv file
df18 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp = pd.read_csv(Path(DataDir/'18_SensorsAfricaPlantower_108'/File),skiprows = 1, parse_dates=['timestamp'], index_col="timestamp", names=Vars)
if len(df18) == 0:
df18 = dftmp
else:
df18 = df18.combine_first(dftmp)
df18.index.name='Date'
20210730_1016_18_SensorsAfricaPlantower_108_AR(N).csv 20210630_1016_18_SensorsAfricaPlantower_108_AR(N).csv 20210926_1000_18_SensorsAfricaPlantower_108_AR(N).csv 20210530_1047_18_SensorsAfricaPlantower_108_AR(N).csv 20210830_1000_18_SensorsAfricaPlantower_108_AR(N).csv 20211029_1000_18_SensorsAfricaPlantower_108_AR(N).csv 20210931_1000_18_SensorsAfricaPlantower_108_AR(N).csv
df18.index = df18.index.tz_convert('Africa/Johannesburg')
# Changing the short column format to the long colum format
df18 = pd.pivot_table(df18, values='value', index=['Date'],columns=['value_type'], aggfunc='first').resample('5min').mean()
df18=df18.resample('5min').mean()
df18.plot(subplots = True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
Rename
df18=df18.rename(columns={'humidity':'18_Relative_Humidity (%)',
'temperature':'18_Ambient_Temperature (degC)',
'P0':'18_PM1 (ug/m3)',
'P2':'18_PM2.5 (ug/m3)',
'P1':'18_PM10 (ug/m3)'
})
df18.to_csv(Path(TabDir/'df18.csv',index = True, header=True), float_format="%.3f")
Zeph20 = Path(DataDir/'20_EarthsenseZephyr_642-SA')
Import data
head(Zeph20/'20210615_1302_20_Zephyr_642-SA_DK.csv')
Timestamp(UTC),Timestamp(UTS),Timestamp(Local),642- SA-Latitude,642- SA-Longitude,642- SA-Temp(C)-slotA,642- SA-Humidity(%RH)-slotA,642- SA-NO2(ug/m3)-slotA,642- SA-O3(ug/m3)-slotA,642- SA-NO(ug/m3)-slotA,642- SA-SO2(ug/m3)-slotA,642- SA-PM1(ug/m3)-slotA,642- SA-PM2.5(ug/m3)-slotA,642- SA-PM10(ug/m3)-slotA,642- SA-CO(mg/m3)-slotA,642- SA-H2S(ug/m3)-slotA,642- SA-Ambient temp(C)-slotA,642- SA-Ambient humidity(%RH)-slotA,642- SA-Ambient pressure(hPa)-slotA "2021-05-23T22:00:02+00:00",1621807202,"2021-05-24T00:00:02+0200",-26.724667,27.884808,8,82,35.65,0,5.82,0,30.64,37.87,48,3.62,16.39,8,81,869.3 "2021-05-23T22:00:12+00:00",1621807212,"2021-05-24T00:00:12+0200",-26.724667,27.884808,8,82,37.66,0,26.83,0,26.36,36.76,45.82,3.56,16.49,8,81,869.3 "2021-05-23T22:00:22+00:00",1621807222,"2021-05-24T00:00:22+0200",-26.724667,27.884808,8,82,35.66,0,54.37,0,27.79,36.76,45.82,3.06,14.89,8,81,869.3 "2021-05-23T22:00:32+00:00",1621807232,"2021-05-24T00:00:32+0200",-26.724667,27.884808,8,82,31.9,0,24.18,0,26.36,35.64,45.86,2.12,12.89,8,81,869.3 "2021-05-23T22:00:42+00:00",1621807242,"2021-05-24T00:00:42+0200",-26.724667,27.884808,8,82,31.68,0,56.01,0,26.36,36.85,47.53,3.65,11.74,8,81,869.3 "2021-05-23T22:00:52+00:00",1621807252,"2021-05-24T00:00:52+0200",-26.724667,27.884807,8,82,29.02,0,63.15,0,27.08,36.85,47.53,2.04,9.16,8,81,869.3 "2021-05-23T22:01:02+00:00",1621807262,"2021-05-24T00:01:02+0200",-26.724667,27.884807,8,82,27.7,0,72.12,0,27.79,38.16,49.8,2.88,8.09,8,81,869.4 "2021-05-23T22:01:12+00:00",1621807272,"2021-05-24T00:01:12+0200",-26.724667,27.884807,8,82,28.16,0,37.03,0,27.79,37.65,48.73,4.05,8.25,8,81,869.3 "2021-05-23T22:01:22+00:00",1621807282,"2021-05-24T00:01:22+0200",null,null,8,82,27.73,0,43.77,0,26.36,36.53,48.21,4.48,8.73,8,81,869.4
Var = ['Timestamp',
'Timestamp(UTS)',
'Timestamp(Local)',
'20_Latitude',
'20_Longitude',
'20_Internal_Temerature (degC)',
'20_Internal_Relative_Humidity (%)',
'20_NO2 (ug/m3)',
'20_O3 (ug/m3)',
'20_NO (ug/m3)',
'20_SO2 (ug/m3)',
'20_PM1 (ug/m3)',
'20_PM2.5 (ug/m3)',
'20_PM10 (ug/m3)',
'20_CO (mg/m3)',
'20_H2S (ug/m3)',
'20_Ambient_Temperature (degC)',
'20_Relative_Humidity (%)',
'20_Pressure (hPa)']
files = [f for f in os.listdir(Zeph20)]
df20 = pd.DataFrame()
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(Zeph20,f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
dftmp = pd.read_csv(fullpath,header=0,names=Var,na_values=["NAN","NaN",'undefined'],parse_dates=[0], index_col=0)
if len(dftmp) == 0:
df20 = dftmp
else:
df20 = df20.combine_first(dftmp)
print("Finished with {}".format(f))
1 of 25: Reading 20210615_1302_20_Zephyr_642-SA_DK.csv Finished with 20210615_1302_20_Zephyr_642-SA_DK.csv 2 of 25: Reading 20210524_1100_20_Zypher_642-SA_VW.csv Finished with 20210524_1100_20_Zypher_642-SA_VW.csv 3 of 25: Reading 20210621_1120_20_Zypher_642-SA_VW.csv Finished with 20210621_1120_20_Zypher_642-SA_VW.csv 4 of 25: Reading 20210503_1300_20_Zypher_642-SA_VW.csv Finished with 20210503_1300_20_Zypher_642-SA_VW.csv 5 of 25: Reading 20210426_1232_20_Zephyr_642-SA_AR.csv Finished with 20210426_1232_20_Zephyr_642-SA_AR.csv 6 of 25: Reading 20210816_1124_20_Zephyr_642-SA_DK.csv Finished with 20210816_1124_20_Zephyr_642-SA_DK.csv 7 of 25: Reading 20210511_0930_20_Zypher_642-SA_VW.csv Finished with 20210511_0930_20_Zypher_642-SA_VW.csv 8 of 25: Reading 20210810_1003_20_Zephyr_642-SA_DK.csv Finished with 20210810_1003_20_Zephyr_642-SA_DK.csv 9 of 25: Reading 20210906_1048_20_Zephyr_642-SA_DK.csv Finished with 20210906_1048_20_Zephyr_642-SA_DK.csv 10 of 25: Reading 20210517_1429_20_Zephyr_642-SA_LP.csv Finished with 20210517_1429_20_Zephyr_642-SA_LP.csv 11 of 25: Reading 20211025_1431_20_Zephyr_642-SA_DK.csv Finished with 20211025_1431_20_Zephyr_642-SA_DK.csv 12 of 25: Reading 20210920_1031_20_Zephyr_642-SA_DK.csv Finished with 20210920_1031_20_Zephyr_642-SA_DK.csv 13 of 25: Finished with 20211029_0853_20_Zephyr_642-SA_DK.xlsx 14 of 25: Reading 20210728_0917_20_Zypher_642-SA_VW.csv Finished with 20210728_0917_20_Zypher_642-SA_VW.csv 15 of 25: Reading 20210712_1200_20_Zephyr_642-SA_DK.csv Finished with 20210712_1200_20_Zephyr_642-SA_DK.csv 16 of 25: Reading 20211005_1257_20_Zephyr_642-SA_DK.csv Finished with 20211005_1257_20_Zephyr_642-SA_DK.csv 17 of 25: Reading 20210705_1022_20_Zephyr_642-SA_DK.csv Finished with 20210705_1022_20_Zephyr_642-SA_DK.csv 18 of 25: Reading 20210913_0925_20_Zephyr_642-SA_DK.csv Finished with 20210913_0925_20_Zephyr_642-SA_DK.csv 19 of 25: Reading 20210719_1023_20_Zephyr_642-SA_DK.csv Finished with 20210719_1023_20_Zephyr_642-SA_DK.csv 20 of 25: Reading 20210823_1026_20_Zephyr_642-SA_DK.csv Finished with 20210823_1026_20_Zephyr_642-SA_DK.csv 21 of 25: Reading 20211029_0853_20_Zephyr_642-SA_DK.csv Finished with 20211029_0853_20_Zephyr_642-SA_DK.csv 22 of 25: Reading 20210726_1026_20_Zypher_642-SA_AR.csv Finished with 20210726_1026_20_Zypher_642-SA_AR.csv 23 of 25: Reading 20210629_0917_20_Zypher_642-SA_VW.csv Finished with 20210629_0917_20_Zypher_642-SA_VW.csv 24 of 25: Reading 20210830_0925_20_Zephyr_642-SA_DK.csv Finished with 20210830_0925_20_Zephyr_642-SA_DK.csv 25 of 25: Reading 20210429_1120_20_Zypher_642-SA_VW.csv Finished with 20210429_1120_20_Zypher_642-SA_VW.csv
df20.index = pd.to_datetime(df20.index,utc=True,errors='coerce')
df20.index = df20.index.tz_convert('Africa/Johannesburg')
df20.index.name = 'Date'
df20 = df20.resample('10min').mean()
df20 = df20['2021-04-26':]
df20=df20[['20_NO2 (ug/m3)', '20_O3 (ug/m3)',
'20_NO (ug/m3)', '20_PM2.5 (ug/m3)', '20_PM1 (ug/m3)','20_SO2 (ug/m3)',
'20_PM10 (ug/m3)', '20_CO (mg/m3)', '20_H2S (ug/m3)',
'20_Ambient_Temperature (degC)', '20_Relative_Humidity (%)',
'20_Pressure (hPa)']]
VariableLimits={'20_PM10 (ug/m3)':[0.001,1000],
'20_PM1 (ug/m3)':[0.001,1000],
'20_PM2.5 (ug/m3)':[0.001,1000],
'20_NO2 (ug/m3)':[0.001,2000],
'20_NO (ug/m3)':[0.001,2000],
# '20_SO2 (ug/m3)':[0.001,2000],
'20_O3 (ug/m3)':[0.001,2000],
'20_CO (mg/m3)':[0.001,10],
'20_H2S (ug/m3)':[0.001,10],
'20_Pressure (hPa)':[800,1000],
'20_Ambient_Temperature (degC)':[-40,60],
'20_Relative_Humidity (%)':[0.001,100.001]
}
for v in VariableLimits.keys():
print(v)
df20.loc[(df20[v]<VariableLimits[v][0])|(df20[v]>VariableLimits[v][1]),v]=nan
20_PM10 (ug/m3) 20_PM1 (ug/m3) 20_PM2.5 (ug/m3) 20_NO2 (ug/m3) 20_NO (ug/m3) 20_O3 (ug/m3) 20_CO (mg/m3) 20_H2S (ug/m3) 20_Pressure (hPa) 20_Ambient_Temperature (degC) 20_Relative_Humidity (%)
df20['20_NO2 (ppb)']=df20['20_NO2 (ug/m3)']/1.886
#df20['20_SO2 (ppb)']=df20['20_SO2 (ug/m3)']/2.6178
df20['20_O3 (ppb)']=df20['20_O3 (ug/m3)']/1.967
df20['20_CO (ppm)']=df20['20_CO (mg/m3)']/1.153
df20['20_NO (ppb)']=df20['20_NO (ug/m3)']*24.45/30.01
df20['20_H2S (ppb)']=df20['20_H2S (ug/m3)']*24.45/34.1
Quick view
df20=df20.resample('10min').mean()
df20.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
df20.to_csv(Path(TabDir/'df20.csv'),float_format='%.03f')
Zeph21 = Path(DataDir/'21_EarthsenseZephyr_729-SA')
os.listdir(Zeph21)
['20210830_0927_21_Zepher_729-SA_DK.csv', '20211029_0854_21_Zephyr_729-SA_DK.csv', '20210913_0926_21_Zepher_729-SA_DK.csv', '20210629_0920_21_Zypher_729-SA_VW.csv', '20210719_1026_21_Zypher_729-SA_DK.csv', '20211005_1300_21_Zepher_729-SA_DK.csv', '20210726_1026_21_Zypher_729-SA_AR.csv', '20211025_1437_21_Zepher_729-SA_DK.csv', '20210705_1043_21_Zypher_729-SA_DK.csv', '20210429_1120_21_Zypher_729-SA_VW.csv', '20210810_1014_21_Zypher_729-SA_DK.csv', '20210615_1324_21_Zypher_729-SA_DK.csv', '20210816_1127_21_Zypher_729-SA_DK.csv', '20210712_1205_21_Zypher_729-SA_DK.csv', '20210517_1444_21_Zypher_729-SA_AR.csv', '20210621_1437_21_Zypher_729-SA_VW.csv', '20210728_0920_21_Zypher_729-SA_VW.csv', '20210906_1051_21_Zepher_729-SA_DK.csv', '20210823_1033_21_Zypher_729-SA_DK.csv', '20210524_1100_21_Zypher_729-SA_VW.csv', '20210511_0945_21_Zypher_729-SA_VW.csv', '20210920_1057_21_Zepher_729-SA_DK.csv']
head(Zeph21/'20210830_0927_21_Zepher_729-SA_DK.csv')
Timestamp(UTC),Timestamp(UTS),Timestamp(Local),729- SA-Latitude,729- SA-Longitude,729- SA-Temp(C)-slotA,729- SA-Humidity(%RH)-slotA,729- SA-NO2(ug/m3)-slotA,729- SA-O3(ug/m3)-slotA,729- SA-NO(ug/m3)-slotA,729- SA-SO2(ug/m3)-slotA,729- SA-PM1(ug/m3)-slotA,729- SA-PM2.5(ug/m3)-slotA,729- SA-PM10(ug/m3)-slotA,729- SA-CO(mg/m3)-slotA,729- SA-H2S(ug/m3)-slotA,729- SA-Ambient temp(C)-slotA,729- SA-Ambient humidity(%RH)-slotA,729- SA-Ambient pressure(hPa)-slotA "2021-08-22T22:00:00+00:00",1629669600,"2021-08-23T00:00:00+0200",-26.724585,27.884777,13,71,28.18,35.95,1.28,35.78,17.1,32.64,33.61,0.9,11.52,13,72,861.6 "2021-08-22T22:00:10+00:00",1629669610,"2021-08-23T00:00:10+0200",-26.724585,27.884775,13,71,20.02,0,3.81,37.57,16.39,30.73,31.21,0.57,10.64,13,72,861.6 "2021-08-22T22:00:20+00:00",1629669620,"2021-08-23T00:00:20+0200",-26.724585,27.884775,13,71,25.79,23.25,0,34.33,17.1,31.53,32.48,0.16,10.51,13,72,861.6 "2021-08-22T22:00:30+00:00",1629669630,"2021-08-23T00:00:30+0200",-26.724585,27.884775,13,70,21.61,0.95,4.45,34.55,18.53,31.74,34.43,0.63,8.95,13,72,861.5 "2021-08-22T22:00:40+00:00",1629669640,"2021-08-23T00:00:40+0200",-26.724585,27.884775,13,70,25.17,20.4,1.28,34.41,17.1,30.33,30.7,0,9.05,13,72,861.5 "2021-08-22T22:00:50+00:00",1629669650,"2021-08-23T00:00:50+0200",-26.724585,27.884777,13,70,23.86,0,4.45,33.21,17.1,29.72,32,0.41,9.86,13,72,861.5 "2021-08-22T22:01:00+00:00",1629669660,"2021-08-23T00:01:00+0200",-26.724585,27.884777,13,70,23.86,37.95,1.35,31.94,17.1,31.2,33.25,0.6,9.91,13,72,861.5 "2021-08-22T22:01:10+00:00",1629669670,"2021-08-23T00:01:10+0200",-26.724585,27.884777,13,70,24.5,14.05,0.92,35.66,16.39,30.26,31.3,0.78,10.87,13,72,861.5 "2021-08-22T22:01:20+00:00",1629669680,"2021-08-23T00:01:20+0200",-26.724585,27.884777,13,71,25.57,0,1.71,34.84,16.39,29.32,29.99,0.7,8.93,13,72,861.5
Import data
Var = ['Timestamp(UTC)',
'Timestamp(UTS)',
'Timestamp(Local)',
'21_Latitude',
'21_Longitude',
'21_Internal_Temerature (degC)',
'21_Internal_Relative_Humidity (%)',
'21_NO2 (ug/m3)',
'21_O3 (ug/m3)',
'21_NO (ug/m3)',
'21_SO2 (ug/m3)',
'21_PM1 (ug/m3)',
'21_PM2.5 (ug/m3)',
'21_PM10 (ug/m3)',
'21_CO (mg/m3)',
'21_H2S (ug/m3)',
'21_Ambient_Temperature (degC)',
'21_Relative_Humidity (%)',
'21_Pressure (hPa)']
files = [f for f in os.listdir(Zeph21)]
df21 = pd.DataFrame()
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(Zeph21,f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
dftmp = pd.read_csv(fullpath,header=0,names=Var,na_values=["NAN","NaN",'undefined'],parse_dates=[0],index_col=0)
if len(dftmp) == 0:
df21 = dftmp
else:
df21 = df21.combine_first(dftmp)
print("Finished with {}".format(f))
1 of 22: Reading 20210830_0927_21_Zepher_729-SA_DK.csv Finished with 20210830_0927_21_Zepher_729-SA_DK.csv 2 of 22: Reading 20211029_0854_21_Zephyr_729-SA_DK.csv Finished with 20211029_0854_21_Zephyr_729-SA_DK.csv 3 of 22: Reading 20210913_0926_21_Zepher_729-SA_DK.csv Finished with 20210913_0926_21_Zepher_729-SA_DK.csv 4 of 22: Reading 20210629_0920_21_Zypher_729-SA_VW.csv Finished with 20210629_0920_21_Zypher_729-SA_VW.csv 5 of 22: Reading 20210719_1026_21_Zypher_729-SA_DK.csv Finished with 20210719_1026_21_Zypher_729-SA_DK.csv 6 of 22: Reading 20211005_1300_21_Zepher_729-SA_DK.csv Finished with 20211005_1300_21_Zepher_729-SA_DK.csv 7 of 22: Reading 20210726_1026_21_Zypher_729-SA_AR.csv Finished with 20210726_1026_21_Zypher_729-SA_AR.csv 8 of 22: Reading 20211025_1437_21_Zepher_729-SA_DK.csv Finished with 20211025_1437_21_Zepher_729-SA_DK.csv 9 of 22: Reading 20210705_1043_21_Zypher_729-SA_DK.csv Finished with 20210705_1043_21_Zypher_729-SA_DK.csv 10 of 22: Reading 20210429_1120_21_Zypher_729-SA_VW.csv Finished with 20210429_1120_21_Zypher_729-SA_VW.csv 11 of 22: Reading 20210810_1014_21_Zypher_729-SA_DK.csv Finished with 20210810_1014_21_Zypher_729-SA_DK.csv 12 of 22: Reading 20210615_1324_21_Zypher_729-SA_DK.csv Finished with 20210615_1324_21_Zypher_729-SA_DK.csv 13 of 22: Reading 20210816_1127_21_Zypher_729-SA_DK.csv Finished with 20210816_1127_21_Zypher_729-SA_DK.csv 14 of 22: Reading 20210712_1205_21_Zypher_729-SA_DK.csv Finished with 20210712_1205_21_Zypher_729-SA_DK.csv 15 of 22: Reading 20210517_1444_21_Zypher_729-SA_AR.csv Finished with 20210517_1444_21_Zypher_729-SA_AR.csv 16 of 22: Reading 20210621_1437_21_Zypher_729-SA_VW.csv Finished with 20210621_1437_21_Zypher_729-SA_VW.csv 17 of 22: Reading 20210728_0920_21_Zypher_729-SA_VW.csv Finished with 20210728_0920_21_Zypher_729-SA_VW.csv 18 of 22: Reading 20210906_1051_21_Zepher_729-SA_DK.csv Finished with 20210906_1051_21_Zepher_729-SA_DK.csv 19 of 22: Reading 20210823_1033_21_Zypher_729-SA_DK.csv Finished with 20210823_1033_21_Zypher_729-SA_DK.csv 20 of 22: Reading 20210524_1100_21_Zypher_729-SA_VW.csv Finished with 20210524_1100_21_Zypher_729-SA_VW.csv 21 of 22: Reading 20210511_0945_21_Zypher_729-SA_VW.csv Finished with 20210511_0945_21_Zypher_729-SA_VW.csv 22 of 22: Reading 20210920_1057_21_Zepher_729-SA_DK.csv Finished with 20210920_1057_21_Zepher_729-SA_DK.csv
df21.head()
| Timestamp(UTS) | Timestamp(Local) | 21_Latitude | 21_Longitude | 21_Internal_Temerature (degC) | 21_Internal_Relative_Humidity (%) | 21_NO2 (ug/m3) | 21_O3 (ug/m3) | 21_NO (ug/m3) | 21_SO2 (ug/m3) | 21_PM1 (ug/m3) | 21_PM2.5 (ug/m3) | 21_PM10 (ug/m3) | 21_CO (mg/m3) | 21_H2S (ug/m3) | 21_Ambient_Temperature (degC) | 21_Relative_Humidity (%) | 21_Pressure (hPa) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Timestamp(UTC) | ||||||||||||||||||
| 2021-04-21 11:03:21+00:00 | 1619003001 | 2021-04-21T12:03:21+0100 | NaN | NaN | 33 | 27 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 32 | 24 | 864.6 |
| 2021-04-21 11:03:31+00:00 | 1619003011 | 2021-04-21T12:03:31+0100 | NaN | NaN | 33 | 26 | 0.0 | 0.0 | 0.0 | 95.65 | NaN | NaN | NaN | 31.74 | 152.12 | 32 | 24 | 864.6 |
| 2021-04-21 11:03:41+00:00 | 1619003021 | 2021-04-21T12:03:41+0100 | NaN | NaN | 33 | 26 | 0.0 | 0.0 | 0.0 | 114.27 | 5.70 | 26.50 | 26.50 | 31.74 | 148.14 | 32 | 25 | 864.6 |
| 2021-04-21 11:03:51+00:00 | 1619003031 | 2021-04-21T12:03:51+0100 | -26.724680 | 27.884772 | 33 | 25 | 0.0 | 0.0 | 0.0 | 118.57 | 4.99 | 25.43 | 25.43 | 31.74 | 144.96 | 32 | 25 | 864.6 |
| 2021-04-21 11:04:01+00:00 | 1619003041 | 2021-04-21T12:04:01+0100 | -26.724675 | 27.884790 | 33 | 25 | 0.0 | 0.0 | 0.0 | 116.56 | 6.41 | 30.64 | 30.64 | 31.74 | 141.54 | 31 | 25 | 864.6 |
df21.index=pd.to_datetime(df21.index, utc=True, errors='coerce')
df21.index.name = 'Date'
df21=df21.resample('10min').mean()
df21.index = df21.index.tz_convert('Africa/Johannesburg')
df21=df21[['21_NO2 (ug/m3)', '21_O3 (ug/m3)',
'21_NO (ug/m3)', '21_SO2 (ug/m3)', '21_PM1 (ug/m3)', '21_PM2.5 (ug/m3)',
'21_PM10 (ug/m3)', '21_CO (mg/m3)', '21_H2S (ug/m3)',
'21_Ambient_Temperature (degC)', '21_Relative_Humidity (%)',
'21_Pressure (hPa)']]
VariableLimits={'21_PM10 (ug/m3)':[0.001,10000],
'21_PM2.5 (ug/m3)':[0.001,10000],
'21_NO2 (ug/m3)':[0.001,2000],
'21_NO (ug/m3)':[0.001,2000],
'21_SO2 (ug/m3)':[0.001,2000],
'21_O3 (ug/m3)':[0.001,2000],
'21_CO (mg/m3)':[0.001,10],
'21_H2S (ug/m3)':[0.001,10],
'21_Pressure (hPa)':[800,1000],
'21_Ambient_Temperature (degC)':[-40,60],
'21_Relative_Humidity (%)':[0.001,100.001]
}
for v in VariableLimits.keys():
print(v)
df21.loc[(df21[v]<VariableLimits[v][0])|(df21[v]>VariableLimits[v][1]),v]=nan
21_PM10 (ug/m3) 21_PM2.5 (ug/m3) 21_NO2 (ug/m3) 21_NO (ug/m3) 21_SO2 (ug/m3) 21_O3 (ug/m3) 21_CO (mg/m3) 21_H2S (ug/m3) 21_Pressure (hPa) 21_Ambient_Temperature (degC) 21_Relative_Humidity (%)
df21['21_NO2 (ppb)']=df21['21_NO2 (ug/m3)']/1.886
df21['21_SO2 (ppb)']=df21['21_SO2 (ug/m3)']/2.6178
df21['21_O3 (ppb)']=df21['21_O3 (ug/m3)']/1.967
df21['21_CO (ppm)']=df21['21_CO (mg/m3)']/1.153
df21['21_NO (ppb)']=df21['21_NO (ug/m3)']*24.45/30.01
df21['21_H2S (ppb)']=df21['21_H2S (ug/m3)']*24.45/34.1
df21.plot(subplots=True, figsize=(10,10))
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
df21.to_csv(Path(TabDir/'df21.csv'),float_format='%.03f')
Import data
DataFiles =['20210414_0000_22_Atmos_84CCA8B167D2_DK.csv',
'20210728_0000_22_Atmos_84CCA8B167D2_VW.csv',
'20210810_0952_22_Atmos_84CCA8B167D2_DK.csv',
'20210712_1146_22_Atmos_84CCA8B167D2_DK.csv',
'20210906_1025_22_Atmos_84CCA8B167D2_DK.csv',
'20210913_0918_22_Atmos_84CCA8B167D2_DK.csv',
'20210830_0916_22_Atmos_84CCA8B167D2_DK.csv',
'20210705_1003_22_Atmos_84CCA8B167D2_DK.csv',
'20210823_1010_22_Atmos_84CCA8B167D2_DK.csv',
'20210601_0000_22_Atmos_84CCA8B167D2_VW.csv',
'20210615_0000_22_Atmos_84CCA8B167D2_VW.csv',
'20210920_1015_22_Atmos_84CCA8B167D2_DK.csv',
'20210816_1117_22_Atmos_84CCA8B167D2_DK.csv',
'20211005_1246_22_Atmos_84CCA8B167D2_DK.csv',
'20211011_0636_22_Atmos_84CCA8B167D2_DK.csv',
'20211018_1416_22_Atmos_84CCA8B167D2_DK.csv',
'20211025_1417_22_Atmos_84CCA8B167D2_DK.csv',
'20211029_0844_22_Atmos_84CCA8B167D2_DK.csv']
df22 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp22 = pd.read_csv(Path(DataDir/'22_Atmos_84CCA8B167D2'/File), parse_dates=['dt_time'], index_col=['dt_time'])
if len(df22) == 0:
df22 = dftmp22
else:
df22 = df22.combine_first(dftmp22)
df22.index.name='Date'
20210414_0000_22_Atmos_84CCA8B167D2_DK.csv 20210728_0000_22_Atmos_84CCA8B167D2_VW.csv 20210810_0952_22_Atmos_84CCA8B167D2_DK.csv 20210712_1146_22_Atmos_84CCA8B167D2_DK.csv 20210906_1025_22_Atmos_84CCA8B167D2_DK.csv 20210913_0918_22_Atmos_84CCA8B167D2_DK.csv 20210830_0916_22_Atmos_84CCA8B167D2_DK.csv 20210705_1003_22_Atmos_84CCA8B167D2_DK.csv 20210823_1010_22_Atmos_84CCA8B167D2_DK.csv 20210601_0000_22_Atmos_84CCA8B167D2_VW.csv 20210615_0000_22_Atmos_84CCA8B167D2_VW.csv 20210920_1015_22_Atmos_84CCA8B167D2_DK.csv 20210816_1117_22_Atmos_84CCA8B167D2_DK.csv 20211005_1246_22_Atmos_84CCA8B167D2_DK.csv 20211011_0636_22_Atmos_84CCA8B167D2_DK.csv 20211018_1416_22_Atmos_84CCA8B167D2_DK.csv 20211025_1417_22_Atmos_84CCA8B167D2_DK.csv 20211029_0844_22_Atmos_84CCA8B167D2_DK.csv
df22.index = df22.index.tz_localize('Africa/Johannesburg')
df22=df22.rename(columns={'rh':'22_Relative_Humidity (%)',
'temp':'22_Ambient_Temperature (degC)',
'pm1cnc':'22_PM1 (ug/m3)',
'pm2.5cnc':'22_PM2.5 (ug/m3)',
'pm10cnc':'22_PM10 (ug/m3)'
})
df22 = df22[["22_PM1 (ug/m3)","22_PM2.5 (ug/m3)","22_PM10 (ug/m3)","22_Relative_Humidity (%)","22_Ambient_Temperature (degC)"]]
# Resample datasets to 5 minute interval
df22=df22.resample('5min').mean()
df22 = df22['2021-04-26':]
df22.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
df22.to_csv(Path(TabDir/'df22.csv'),float_format='%.03f')
Import data
DataFiles =['20210802_0000_23_Atmos_98F4ABDCA328_DK.csv',
'20210414_0000_23_Atmos_98F4ABDCA328_DK.csv',
'20210823_1012_23_Atmos_98F4ABDCA328_DK.csv',
'20210719_1020_23_Atmos_98F4ABDCA328_DK.csv',
'20210601_0000_23_Atmos_98F4ABDCA328_VW.csv',
'20210810_0954_23_Atmos_98F4ABDCA328_DK.csv',
'20210705_1003_23_Atmos_98F4ABDCA328_DK.csv',
'20210816_1118_23_Atmos_98F4ABDCA328_DK.csv',
'20210830_0917_23_Atmos_98F4ABDCA328_DK.csv',
'20210712_1147_23_Atmos_98F4ABDCA328_DK.csv',
'20210913_0920_23_Atmos_98F4ABDCA328_DK.csv',
'20210920_1017_23_Atmos_98F4ABDCA328_DK.csv',
'20210615_0000_23_Atmos_98F4ABDCA328_VW.csv',
'20210906_1028_23_Atmos_98F4ABDCA328_DK.csv',
'20211025_1422_23_Atmos_98F4ABDCA328_DK.csv',
'20211005_1247_23_Atmos_98F4ABDCA328_VW.csv',
'20211018_1421_23_Atmos_98F4ABDCA328_DK.csv',
'20211011_0637_23_Atmos_98F4ABDCA328_DK.csv',
'20211029_0847_23_Atmos_98F4ABDCA328_DK.csv']
df23 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp23 = pd.read_csv(Path(DataDir/'23_Atmos_98F4ABDCA328'/File), parse_dates=['dt_time'], index_col=['dt_time'])
if len(df23) == 0:
df23 = dftmp23
else:
df23 = df23.combine_first(dftmp23)
df23.index.name='Date'
20210802_0000_23_Atmos_98F4ABDCA328_DK.csv 20210414_0000_23_Atmos_98F4ABDCA328_DK.csv 20210823_1012_23_Atmos_98F4ABDCA328_DK.csv 20210719_1020_23_Atmos_98F4ABDCA328_DK.csv 20210601_0000_23_Atmos_98F4ABDCA328_VW.csv 20210810_0954_23_Atmos_98F4ABDCA328_DK.csv 20210705_1003_23_Atmos_98F4ABDCA328_DK.csv 20210816_1118_23_Atmos_98F4ABDCA328_DK.csv 20210830_0917_23_Atmos_98F4ABDCA328_DK.csv 20210712_1147_23_Atmos_98F4ABDCA328_DK.csv 20210913_0920_23_Atmos_98F4ABDCA328_DK.csv 20210920_1017_23_Atmos_98F4ABDCA328_DK.csv 20210615_0000_23_Atmos_98F4ABDCA328_VW.csv 20210906_1028_23_Atmos_98F4ABDCA328_DK.csv 20211025_1422_23_Atmos_98F4ABDCA328_DK.csv 20211005_1247_23_Atmos_98F4ABDCA328_VW.csv 20211018_1421_23_Atmos_98F4ABDCA328_DK.csv 20211011_0637_23_Atmos_98F4ABDCA328_DK.csv 20211029_0847_23_Atmos_98F4ABDCA328_DK.csv
df23=df23.rename(columns={'rh':'23_Relative_Humidity (%)',
'temp':'23_Ambient_Temperature (degC)',
'pm1cnc':'23_PM1 (ug/m3)',
'pm2.5cnc':'23_PM2.5 (ug/m3)',
'pm10cnc':'23_PM10 (ug/m3)'
})
df23 = df23[["23_PM1 (ug/m3)","23_PM2.5 (ug/m3)","23_PM10 (ug/m3)","23_Relative_Humidity (%)","23_Ambient_Temperature (degC)"]]
df23.index = df23.index.tz_localize('Africa/Johannesburg')
df23 = df23['2021-04-26':]
df23=df23.resample('5min').mean()
df23.plot(subplots=True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>], dtype=object)
df23.to_csv(Path(TabDir/'df23.csv'),float_format='%.03f')
Import data
DataFiles= os.listdir(DataDir/'24_Dylos_'/'')
len(DataFiles)
14
# Default function to read data format from SAAQIS database
#from dateutil.parser import parse
def custom_date_parser(time):
try:
#parse(time, fuzzy=False)
time = datetime.datetime.strptime(time, "%m/%d/%y %H:%M")
except:
return(np.nan)
return(time)
#Naming the variables in the order as it is represented in the dataframes
Vars =['date',
'PM_small',
'PM_large']
df24 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.txt'):
df24tmp = pd.read_csv(Path(DataDir/'24_Dylos_'/File),skiprows = 8, parse_dates=[0], date_parser=custom_date_parser, index_col=0, names=Vars)
# Remove lines with a bad date index
df24tmp=df24tmp.loc[~df24tmp.index.isnull()]
# Remove lines where all data is missing
df24tmp=df24tmp.dropna(axis=0, how='all')
# Make sure that the variables are numeric
df24tmp=df24tmp.apply(pd.to_numeric, errors='ignore')
df24tmp=df24tmp.resample('1min').mean()['2021-01-01':'2022-01-01']
if len(df24) == 0:
df24 = df24tmp
else:
df24 = df24.combine_first(df24tmp)
#df24.index.name='timestamp'
##df24=df24.dropna(axis=0, how='all')
#df24=df24.sort_index()
#df24=df24tmp.resample('1min').mean()
#convert=0.028316846592
#df24['PM_small']=df24['PM_small']*convert
#df24['PM_large']=df24['PM_large']*convert
20210719_1124_24_Dylos_DC1700-SA_DK.txt 20211005_1012_01_Dylos_DC1700-SA_DK.txt 20210810_1010_01_Dylos_DC1700-SA_VW.txt 20211005_1012_01_Dylos_DC1700-SA_DK.docx 20210823_1130_01_Dylos_DC1700-SA_VW.txt 20210906_1007_01_Dylos_DC1700-SA_DK.txt 20210802_1120_24_Dylos_DC1700-SA_VW.txt 20210920_1056_01_Dylos_DC1700-SA_DK.txt 20210621_0920_24_Dylos_DC1700-SA_VW.txt 20210816_1052_01_Dylos_DC1700-SA_DK.txt 20210913_0949_01_Dylos_DC1700-SA_DK.txt 20210830_0949_01_Dylos_DC1700-SA_DK.txt 20210705_1220_24_Dylos_DC1700-SA_DK.txt 20210726_0920_24_Dylos_DC1700-SA_VW.txt
df24.index.name='timestamp'
df24=df24.sort_index()
df24=df24.resample('5min').mean()
convert=0.028316846592
df24['PM_small']=df24['PM_small']*convert
df24['PM_large']=df24['PM_large']*convert
df24.index = df24.index.tz_localize('Africa/Johannesburg')
df24.plot(subplots = True)
array([<AxesSubplot:xlabel='timestamp'>, <AxesSubplot:xlabel='timestamp'>],
dtype=object)
df24.to_csv(Path(TabDir/'df24.csv'), float_format="%.3f")
Zeph26 = Path(DataDir/'26_EarthsenseZephyr_Z00533')
os.listdir(Zeph26)
['20210823_1023_25_Zephyr_533_DK.csv', '20211029_0850_25_Zephyr_533_DK.csv', '20210830_0931_25_Zephyr_533_DK.csv', '20210913_0931_25_Zephyr_533_DK.csv', '20210803_1017_25_Zypher_533_VW.csv', '20210811_0752_25_Zephyr_533_DK.csv', '20210816_1139_25_Zephyr_533_DK.csv', '20211005_1305_25_Zephyr_533_DK.csv', '20210920_1025_25_Zephyr_533_DK.csv', '20211025_1452_25_Zephyr_533_DK.csv', '20210906_1042_25_Zephyr_533_DK.csv']
head(Zeph26/'20210823_1023_25_Zephyr_533_DK.csv')
Timestamp(UTC),Timestamp(UTS),Timestamp(Local),533-Latitude,533-Longitude,533-Temp(C)-slotB,533-Humidity(%RH)-slotB,533-NO2(ug/m3)-slotB,533-O3(ug/m3)-slotB,533-NO(ug/m3)-slotB,533-SO2(ug/m3)-slotB,533-PM1(ug/m3)-slotB,533-PM2.5(ug/m3)-slotB,533-PM10(ug/m3)-slotB,533-CO(mg/m3)-slotB,533-H2S(ug/m3)-slotB,533-Ambient temp(C)-slotB,533-Ambient humidity(%RH)-slotB,533-Ambient pressure(hPa)-slotB "2021-08-15T22:00:01+00:00",1629064801,"2021-08-16T00:00:01+0200",-26.72463,27.884757,8,87,21.7,0,0,4.38,26.36,26.36,41.03,10.32,7.26,-129,100,743.5 "2021-08-15T22:00:11+00:00",1629064811,"2021-08-16T00:00:11+0200",-26.724632,27.884753,8,87,22.65,0,0,4.22,24.94,26.37,42.08,9.83,6.64,-129,100,743.5 "2021-08-15T22:00:21+00:00",1629064821,"2021-08-16T00:00:21+0200",-26.724633,27.884752,8,87,22.58,0,0,4.18,25.65,25.65,41.03,9.73,6.75,-129,100,743.5 "2021-08-15T22:00:31+00:00",1629064831,"2021-08-16T00:00:31+0200",-26.724633,27.884752,8,88,22.19,0,0,4.33,24.94,25,40.47,10.23,6.85,-129,100,743.5 "2021-08-15T22:00:41+00:00",1629064841,"2021-08-16T00:00:41+0200",-26.724633,27.884752,8,88,22.35,0,0,4.2,24.94,24.94,39.43,10.13,7.49,-129,100,743.5 "2021-08-15T22:00:51+00:00",1629064851,"2021-08-16T00:00:51+0200",-26.724632,27.884752,8,88,19.69,0,0,3.72,25.65,25.9,41.52,10.72,7.23,-129,100,743.5 "2021-08-15T22:01:01+00:00",1629064861,"2021-08-16T00:01:01+0200",-26.724632,27.884752,8,88,16.81,0,0,4.29,24.94,25.9,42.56,10.29,6.37,-129,100,743.5 "2021-08-15T22:01:11+00:00",1629064871,"2021-08-16T00:01:11+0200",-26.724632,27.884752,8,88,14.81,0,9.46,3.78,25.65,25.9,40.99,9.99,5.63,-129,100,743.5 "2021-08-15T22:01:21+00:00",1629064881,"2021-08-16T00:01:21+0200",-26.72463,27.884752,8,88,15.92,0,7.34,4,25.65,25.9,40.99,10.28,6.29,-129,100,743.5
Import data
Var = ['Timestamp(UTC)',
'Timestamp(UTS)',
'Timestamp(Local)',
'26_Latitude',
'26_Longitude',
'26_Internal_Temerature (degC)',
'26_Internal_Relative_Humidity (%)',
'26_NO2 (ug/m3)',
'26_O3 (ug/m3)',
'26_NO (ug/m3)',
'26_SO2 (ug/m3)',
'26_PM1 (ug/m3)',
'26_PM2.5 (ug/m3)',
'26_PM10 (ug/m3)',
'26_CO (mg/m3)',
'26_H2S (ug/m3)',
'26_Ambient_Temperature (degC)',
'26_Relative_Humidity (%)',
'26_Pressure (hPa)']
files = [f for f in os.listdir(Zeph26)]
df26 = pd.DataFrame()
n=1
for f in files:
print("{} of {}: ".format(n,len(files)),end="")
n=n+1
fullpath = os.path.join(Zeph26,f)
if f.endswith(".csv") and fullpath.count(f) > 0:
if f.startswith("2021"):
print("Reading ",f)
dftmp = pd.read_csv(fullpath,header=0,names=Var,na_values=["NAN","NaN",'undefined'],index_col=0,parse_dates=[0])
if len(dftmp) == 0:
df26 = dftmp
else:
df26 = df26.combine_first(dftmp)
print("Finished with {}".format(f))
1 of 11: Reading 20210823_1023_25_Zephyr_533_DK.csv Finished with 20210823_1023_25_Zephyr_533_DK.csv 2 of 11: Reading 20211029_0850_25_Zephyr_533_DK.csv Finished with 20211029_0850_25_Zephyr_533_DK.csv 3 of 11: Reading 20210830_0931_25_Zephyr_533_DK.csv Finished with 20210830_0931_25_Zephyr_533_DK.csv 4 of 11: Reading 20210913_0931_25_Zephyr_533_DK.csv Finished with 20210913_0931_25_Zephyr_533_DK.csv 5 of 11: Reading 20210803_1017_25_Zypher_533_VW.csv Finished with 20210803_1017_25_Zypher_533_VW.csv 6 of 11: Reading 20210811_0752_25_Zephyr_533_DK.csv Finished with 20210811_0752_25_Zephyr_533_DK.csv 7 of 11: Reading 20210816_1139_25_Zephyr_533_DK.csv Finished with 20210816_1139_25_Zephyr_533_DK.csv 8 of 11: Reading 20211005_1305_25_Zephyr_533_DK.csv Finished with 20211005_1305_25_Zephyr_533_DK.csv 9 of 11: Reading 20210920_1025_25_Zephyr_533_DK.csv Finished with 20210920_1025_25_Zephyr_533_DK.csv 10 of 11: Reading 20211025_1452_25_Zephyr_533_DK.csv Finished with 20211025_1452_25_Zephyr_533_DK.csv 11 of 11: Reading 20210906_1042_25_Zephyr_533_DK.csv Finished with 20210906_1042_25_Zephyr_533_DK.csv
df26.index = pd.to_datetime(df26.index, errors='coerce')
df26.index.name = 'Date'
df26=df26.resample('5min').mean()
df26.index = df26.index.tz_convert('Africa/Johannesburg')
df26=df26[['26_NO2 (ug/m3)', '26_O3 (ug/m3)',
'26_NO (ug/m3)', '26_SO2 (ug/m3)', '26_PM1 (ug/m3)', '26_PM2.5 (ug/m3)',
'26_PM10 (ug/m3)', '26_CO (mg/m3)', '26_H2S (ug/m3)',
'26_Ambient_Temperature (degC)', '26_Relative_Humidity (%)',
'26_Pressure (hPa)']]
VariableLimits={'26_PM10 (ug/m3)':[0.001,10000],
'26_PM2.5 (ug/m3)':[0.001,10000],
'26_NO2 (ug/m3)':[0.001,2000],
'26_NO (ug/m3)':[0.001,2000],
'26_SO2 (ug/m3)':[0.001,2000],
'26_O3 (ug/m3)':[0.001,2000],
'26_CO (mg/m3)':[0.001,10],
'26_H2S (ug/m3)':[0.001,10],
'26_Pressure (hPa)':[800,1000],
'26_Ambient_Temperature (degC)':[-40,60],
'26_Relative_Humidity (%)':[0.001,100.001]
}
for v in VariableLimits.keys():
print(v)
df26.loc[(df26[v]<VariableLimits[v][0])|(df26[v]>VariableLimits[v][1]),v]=nan
26_PM10 (ug/m3) 26_PM2.5 (ug/m3) 26_NO2 (ug/m3) 26_NO (ug/m3) 26_SO2 (ug/m3) 26_O3 (ug/m3) 26_CO (mg/m3) 26_H2S (ug/m3) 26_Pressure (hPa) 26_Ambient_Temperature (degC) 26_Relative_Humidity (%)
df26['26_NO2 (ppb)']=df26['26_NO2 (ug/m3)']/1.886
df26['26_SO2 (ppb)']=df26['26_SO2 (ug/m3)']/2.6178
df26['26_O3 (ppb)']=df26['26_O3 (ug/m3)']/1.967
df26['26_CO (ppm)']=df26['26_CO (mg/m3)']/1.153
df26['26_NO (ppb)']=df26['26_NO (ug/m3)']*24.45/30.01
df26['26_H2S (ppb)']=df26['26_H2S (ug/m3)']*24.45/34.1
Quick plot
df26.plot(subplots=True, figsize=(10,10))
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
Save
df26.to_csv(Path(TabDir/'df26.csv'),float_format='%.03f')
Import data
DataFiles = ['20210830_0922_27_Polludrone_PM01P0007S_DK.csv',
'20210906_1038_27_Polludrone_PM01P0007S_DK.csv',
'20210810_1000_27_Polludrone_PM01P0007S_DK.csv',
'20210816_1119_27_Polludrone_PM01P0007S_DK.csv',
'20211029_0849_27_Polludrone_PM01P0007S_DK.csv',
'20210823_1031_27_Polludrone_PM01P0007S_DK.csv',
'20210913_0924_27_Polludrone_PM01P0007S_DK.csv',
'20211025_1427_27_Polludrone_PM01P0007S_DK.csv',
'20211005_1255_27_Polludrone_PM01P0007S_DK.csv',
'20210920_1027_27_Polludrone_PM01P0007S_DK.csv']
Vars=["Date",
"27_HAQI",
"27_Battery (%)",
"27_CO2 (ppm)",
"27_CO (mg/m3)",
"27_NO2 (ug/m3)",
"27_O3 (ug/m3)",
"27_NO (ppb)",
"27_SO2 (ug/m3)",
"27_Relative_Humidity (%)",
"27_Leq (dB)",
"27_Light (Lux)",
"27_Lmax (dB)",
"27_Lmin (dB)",
"27_PM2.5 (ug/m3)",
"27_PM10 (ug/m3)",
"Raim (mm)",
"27_Ambient_Temperature (degC)",
"27_UV (Index)"]
df27 = pd.DataFrame()
for File in DataFiles:
print(File)
if File.endswith('.csv'):
dftmp27 = pd.read_csv(Path(DataDir/'27_OizomePolludrone_PM01P007'/File),skiprows=1, parse_dates=[0], dayfirst=True, index_col=0, names=Vars)
if len(df27) == 0:
df27 = dftmp27
else:
df27 = df27.combine_first(dftmp27)
df27.index.name='Date'
20210830_0922_27_Polludrone_PM01P0007S_DK.csv 20210906_1038_27_Polludrone_PM01P0007S_DK.csv 20210810_1000_27_Polludrone_PM01P0007S_DK.csv 20210816_1119_27_Polludrone_PM01P0007S_DK.csv 20211029_0849_27_Polludrone_PM01P0007S_DK.csv 20210823_1031_27_Polludrone_PM01P0007S_DK.csv 20210913_0924_27_Polludrone_PM01P0007S_DK.csv 20211025_1427_27_Polludrone_PM01P0007S_DK.csv 20211005_1255_27_Polludrone_PM01P0007S_DK.csv 20210920_1027_27_Polludrone_PM01P0007S_DK.csv
df27.index = df27.index.tz_localize("Africa/Johannesburg")
df27 = df27.sort_index()
df27['27_NO2 (ppb)']=df27['27_NO2 (ug/m3)']/1.886
df27['27_SO2 (ppb)']=df27['27_SO2 (ug/m3)']/2.6178
df27['27_O3 (ppb)']=df27['27_O3 (ug/m3)']/1.967
df27['27_CO (ppm)']=df27['27_CO (mg/m3)']/1.153
df27.loc[(df27['27_PM10 (ug/m3)']<0.001)|(df27['27_PM10 (ug/m3)']>1000),'27_PM10 (ug/m3)']=nan
df27.loc[(df27['27_PM2.5 (ug/m3)']<0.001)|(df27['27_PM2.5 (ug/m3)']>1000),'27_PM2.5 (ug/m3)']=nan
df27.loc[(df27['27_NO2 (ppb)']<0.001)|(df27['27_NO2 (ppb)']>2000),'27_NO2 (ppb)']=nan
df27.loc[(df27['27_SO2 (ppb)']<0.001)|(df27['27_SO2 (ppb)']>2000),'27_SO2 (ppb)']=nan
df27.loc[(df27['27_O3 (ppb)']<0.001)|(df27['27_O3 (ppb)']>2000),'27_O3 (ppb)']=nan
df27.loc[(df27['27_CO (ppm)']<0.001)|(df27['27_CO (ppm)']>1000),'27_CO (ppm)']=nan
df27.loc[(df27['27_Ambient_Temperature (degC)']<-20)|(df27['27_Ambient_Temperature (degC)']>85),'27_Ambient_Temperature (degC)']=nan
df27.loc[(df27['27_Relative_Humidity (%)']<0.001)|(df27['27_Relative_Humidity (%)']>100),'27_Relative_Humidity (%)']=nan
Quick Plot
var27=["27_CO2 (ppm)",
"27_CO (ppm)",
"27_NO2 (ppb)",
"27_O3 (ppb)",
"27_NO (ppb)",
"27_SO2 (ppb)",
"27_Relative_Humidity (%)",
"27_PM2.5 (ug/m3)",
"27_PM10 (ug/m3)",
"27_Ambient_Temperature (degC)"]
df27=df27[var27]
df27=df27.resample('10min').mean()
df27.plot(subplots= True)
array([<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>,
<AxesSubplot:xlabel='Date'>, <AxesSubplot:xlabel='Date'>],
dtype=object)
Save
df27.to_csv(Path(TabDir/'df27.csv'), float_format="%.3f")
Here all the data from the individual instruments are combined into a single dataset
InstrumentNames=["00_Reference_SAWS_df0",
"01_ES642_U16486_df1",
"02_ES642_U16489_df2",
"03_ARISense_SN000-57_df3",
"04_ARISense_SN000-59_df4",
"05_Vaisala_S1830003_df5",
"06_S500_5002-2D82-001_df6",
"07_S500_ECM-1906191-003_df7",
"08_PolludroneSmart_EA01P0001_df8",
"09_SimplicityV1_CCSENV011_df9",
"10_SimplicityV1_CCSENV020_df10",
"11_SimplicityV2_IMTAQS0001_df11",
"12_SimplicityV2_IMTAQS0002_df12",
"13_SimplicityV2_IMTAQS0003_df13",
"14_SimplicityV2_IMTAQS0004_df14",
"15_ICOMSMART_20149_df15",
"16_PolludroneSmart_xxx_df16",
"17_GM-5000_CM21035290_df17",
"18_Plantower_xxx_df18",
"19_Plantower_xxx_df19",
"20_Zephyr_642-SA_df20",
"21_Zephyr_729-SA_df21",
"22_Atmos_84CCA8B167D2_df22",
"23_Atmos_98F4ABDCA328_df23",
#"24_RAMP_xxx_df24",
#"25_RAMP_xxx_df25",
"27_PolludroneSmart_PM01P0007_df27"]
df=pd.DataFrame()
Files = [f for f in sorted(os.listdir(TabDir)) if f.startswith("df") and 'level' not in f]
for f in Files:
print(f)
dftmp=pd.read_csv(TabDir/f, parse_dates=[0], index_col=0 )
dftmp=dftmp.resample('10min').mean()
df = pd.concat([df,dftmp], axis=1)
df=df.loc['2021-04-26':'2021-10-29']
df0.csv df1.csv df10.csv df11.csv df12.csv df13.csv df14.csv df15.csv df17.csv df18.csv df2.csv df20.csv df21.csv df22.csv df23.csv df24.csv df26.csv df27.csv df3.csv df4.csv df5.csv df6.csv df7.csv df8.csv df9.csv
#df.to_csv(Path(TabDir / 'LCS2021_level1.csv'))
#df.to_csv('DataOfSensors/LCS2021_level1.csv')
Action to take place before Level 2 data. Flags for QC are defined as follow:
#Reports
# Update google sheet
reports, Reports = readSheet()
Units={"Ambient_Relative_Humidity":"(%)",
"Humidity":"(%)",
"Internal_Battery":"(%)",
"Internal_Relative_Humidity":"(%)",
"Relative_Humidity":"(%)",
"Internal_Temp":"(C)",
"Noise":"(dB)",
"Wind_Direction":"(deg)",
"Ambient_Temperature":"(degC)",
"Internal_Temperature":"(degC)",
"Sensor_Temperature":"(degC)",
"Temperature":"(degC)",
"Wind_Direction":"(degrees)",
"Winds_Speed":"(degrees)",
"Atmospheric_Pressure":"(hPa)",
"Pressure":"(hPa)",
"Flow":"(l/m)",
"CO":"(mg/m3)",
"Rain":"(mm)",
"Wind_Direction":"(m/s)",
"Wind_Speed":"(m/s)",
"H2S":"(ppb)",
"NO2":"(ppb)",
"NO":"(ppb)",
"NOx":"(ppb)",
"O3":"(ppb)",
"SO2":"(ppb)",
"TVOC":"(ppb)",
"CO2":"(ppm)",
"CO":"(ppm)",
"eCO2":"(ppm)",
"H2S":"(ug/m3)",
"NO2":"(ug/m3)",
"NO":"(ug/m3)",
"O3":"(ug/m3)",
"PM10":"(ug/m3)",
"PM1":"(ug/m3)",
"PM2.5":"(ug/m3)",
"SO2":"(ug/m3)",
"Line_Voltage":"(V)",
"Solar_Radiation":"(W/m2)"
}
for index,row in Reports.iterrows():
ins='What is the instrument?'
var='Which instrument variable failed?'
sdate='When did the instrument start failing?'
stime='What time did the instrument start to fail?'
edate='When was the instrument failure fixed?'
etime='What time was the instrument failure fixed?'
S=pd.to_datetime("{} {}".format(row[sdate],row[stime])).tz_localize('Africa/Johannesburg')
E=pd.to_datetime("{} {}".format(row[edate],row[etime])).tz_localize('Africa/Johannesburg')
v="{:02.0f}_{}".format(int(row[ins].replace("df","")),row[var])
if v in df.columns:
print("Correcting ",v)
df.loc[(df.index >= S)&(df.index <= E), v] = nan
else:
print("{} not in dataset".format(v))
Correcting 00_SO2 (ppb) 05_All not in dataset 05_All not in dataset Correcting 06_PM2.5 (ug/m3) Correcting 00_Horiba_SO2 (ppb) Correcting 05_PM2.5 (ug/m3) Correcting 05_PM10 (ug/m3) Correcting 00_PM10 (ug/m3) Correcting 00_PM10 (ug/m3) Correcting 00_NO (ppb) Correcting 00_NO2 (ppb) Correcting 00_NOx (ppb) Correcting 00_NO (ppb) Correcting 00_NO2 (ppb) Correcting 00_NOx (ppb) Correcting 03_NO (ppb) Correcting 00_Horiba_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb) Correcting 00_SO2 (ppb)
ax=df[[i for i in df.columns if 'PM10' in i]].resample('1H').mean().plot()
ax.get_legend().remove()
df[[i for i in df.columns if 'PM10' in i]].resample('1H').mean()
| 00_PM10 (ug/m3) | 10_PM10 (ug/m3) | 11_PM10 (ug/m3) | 12_PM10 (ug/m3) | 13_PM10 (ug/m3) | 14_PM10 (ug/m3) | 15_PM10 (ug/m3) | 17_PM10 (ug/m3) | 18_PM10 (ug/m3) | 20_PM10 (ug/m3) | ... | 22_PM10 (ug/m3) | 23_PM10 (ug/m3) | 26_PM10 (ug/m3) | 27_PM10 (ug/m3) | 03_PM10 (ug/m3) | 04_PM10 (ug/m3) | 05_PM10 (ug/m3) | 06_PM10 (ug/m3) | 08_PM10 (ug/m3) | 09_PM10 (ug/m3) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2021-04-26 00:00:00+02:00 | 29.727500 | NaN | NaN | NaN | NaN | NaN | NaN | 11.201667 | NaN | 17.129667 | ... | 24.950000 | 13.100000 | NaN | NaN | NaN | NaN | NaN | NaN | 28.238167 | NaN |
| 2021-04-26 01:00:00+02:00 | 46.366250 | NaN | NaN | NaN | NaN | NaN | NaN | 23.050000 | NaN | 26.312500 | ... | 42.566667 | 21.341667 | NaN | NaN | NaN | NaN | NaN | NaN | 54.102000 | NaN |
| 2021-04-26 02:00:00+02:00 | 27.057500 | NaN | NaN | NaN | NaN | NaN | NaN | 33.475000 | NaN | 17.400167 | ... | 24.487500 | 12.475000 | NaN | NaN | 11.593083 | 22.622417 | 49.183333 | 34.000000 | 41.269333 | NaN |
| 2021-04-26 03:00:00+02:00 | 31.692500 | NaN | NaN | NaN | NaN | NaN | NaN | 87.671000 | NaN | 24.066000 | ... | 35.708333 | 17.091667 | NaN | NaN | 16.559167 | 36.227667 | 123.125000 | 64.166667 | 55.733333 | NaN |
| 2021-04-26 04:00:00+02:00 | 48.042500 | NaN | NaN | NaN | NaN | NaN | NaN | 66.350000 | NaN | 30.644833 | ... | 50.300000 | 24.866667 | NaN | NaN | 21.434417 | 42.075417 | 134.841667 | 50.666667 | 81.479667 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-10-29 19:00:00+02:00 | 5.688617 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2021-10-29 20:00:00+02:00 | 3.841533 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2021-10-29 21:00:00+02:00 | 6.940833 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2021-10-29 22:00:00+02:00 | 9.179317 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2021-10-29 23:00:00+02:00 | 6.118617 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4488 rows × 21 columns
pm10l=[i for i in df.columns if 'PM10' in i and i != '00_PM10 (ug/m3)']
len([i for i in df.columns if 'PM10' in i and i != '00_PM10 (ug/m3)'])
20
Y=df['00_PM10 (ug/m3)'].resample('1H').mean().values
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
for p in [i for i in df.columns if 'PM10' in i and i != '00_PM10 (ug/m3)']:
ax.scatter(df[p].resample('1H').mean().values,Y)
print(p)
ax.set_ylabel('Reference PM10 (ug/m3)')
ax.set_xlabel('Low cost PM10 (ug/m3)')
ax.set_xlim([0,200])
ax.set_ylim([0,200])
10_PM10 (ug/m3) 11_PM10 (ug/m3) 12_PM10 (ug/m3) 13_PM10 (ug/m3) 14_PM10 (ug/m3) 15_PM10 (ug/m3) 17_PM10 (ug/m3) 18_PM10 (ug/m3) 20_PM10 (ug/m3) 21_PM10 (ug/m3) 22_PM10 (ug/m3) 23_PM10 (ug/m3) 26_PM10 (ug/m3) 27_PM10 (ug/m3) 03_PM10 (ug/m3) 04_PM10 (ug/m3) 05_PM10 (ug/m3) 06_PM10 (ug/m3) 08_PM10 (ug/m3) 09_PM10 (ug/m3)
(0.0, 200.0)
Y=df['00_PM10 (ug/m3)'].resample('1H').mean().values
lcvars=[i for i in df.columns if 'PM10' in i and i != '00_PM10 (ug/m3)']
fig, axs = plt.subplots(5,4, sharex='col',sharey='row',gridspec_kw={'hspace':0.1, 'wspace':0.1}, figsize=(12,12))
for i in range(len(lcvars)):
dftmp=df[['00_PM10 (ug/m3)',lcvars[i]]].resample('1H').mean()
dftmp=dftmp.dropna(how='any')
X=dftmp[lcvars[i]].values
Y=dftmp['00_PM10 (ug/m3)'].values
if len(X)>0 and len(Y)>0:
axs[int(i/4),i%4].scatter(X,Y, alpha=0.25)
axs[int(i/4),i%4].set_xlim(0,200)
axs[int(i/4),i%4].set_ylim(0,200)
res = linregress(X,Y)
axs[int(i/4),i%4].text(0.3,0.1,"y={:.1f}x+{:.1f} ({:.2f})".format(res.intercept,res.slope, res.rvalue), fontsize=10, transform=axs[int(i/4),i%4].transAxes)
axs[int(i/4),i%4].set_title(lcvars[i].split("_")[0], y=1.0, pad=-14, fontsize=14)
#axs[int(i/4),i%4].set_ylabel('Reference PM10 (ug/m3)')
#axs[int(i/4),i%4].set_xlabel('Low cost PM10 (ug/m3)')
axs[int(i/4),i%4].plot(X, res.intercept+res.slope*X, 'r')
#print(b,m,r,p)
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False )
plt.grid(False)
plt.xlabel("Low cost PM10 (ug/m3)", fontsize=16)
plt.ylabel("Reference PM10 (ug/m3)", fontsize=16)
#plt.savefig(Path(FigDir / '20210929_CorrMatrix_{}.png'.format('PM10')), dpi=90, bbox_inches='tight', pad_inches=0)
Text(0, 0.5, 'Reference PM10 (ug/m3)')
res
LinregressResult(slope=1.2480156117719676, intercept=22.544722211457255, rvalue=0.619076604079, pvalue=0.0, stderr=0.025338053235822267, intercept_stderr=0.9390292253697303)
lcvars
['10_PM10 (ug/m3)', '11_PM10 (ug/m3)', '12_PM10 (ug/m3)', '13_PM10 (ug/m3)', '14_PM10 (ug/m3)', '15_PM10 (ug/m3)', '17_PM10 (ug/m3)', '18_PM10 (ug/m3)', '20_PM10 (ug/m3)', '21_PM10 (ug/m3)', '22_PM10 (ug/m3)', '23_PM10 (ug/m3)', '26_PM10 (ug/m3)', '27_PM10 (ug/m3)', '03_PM10 (ug/m3)', '04_PM10 (ug/m3)', '05_PM10 (ug/m3)', '06_PM10 (ug/m3)', '08_PM10 (ug/m3)', '09_PM10 (ug/m3)']
df = df.reindex(sorted(df.columns), axis=1)
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
for var in Vars:
refvar='00_{}'.format(var)
Y=df[refvar].resample('1H').mean().values
lcvars=[i for i in df.columns if var in i and i != refvar]
fig, axs = plt.subplots(6,4,sharex='col',sharey='row',gridspec_kw={'hspace':0.1, 'wspace':0.1}, figsize=(12,12))
for i in range(len(lcvars)):
dftmp=df[[refvar,lcvars[i]]].resample('1H').mean()
dftmp=dftmp.dropna(how='any')
X=dftmp[lcvars[i]].values
Y=dftmp[refvar].values
if len(X)>0 and len(Y)>0:
axs[int(i/4),i%4].scatter(X,Y, alpha=0.25)
axs[int(i/4),i%4].set_xlim(0,dftmp[refvar].max())
axs[int(i/4),i%4].set_ylim(0,dftmp[refvar].max())
res = linregress(X,Y)
axs[int(i/4),i%4].text(0.3,0.1,"y={:.1f}x+{:.1f} ({:.2f})".format(res.intercept,res.slope, res.rvalue), fontsize=10, transform=axs[int(i/4),i%4].transAxes)
axs[int(i/4),i%4].set_title(lcvars[i].split("_")[0], y=1.0, pad=-14, fontsize=14)
#axs[int(i/4),i%4].set_ylabel('Reference PM10 (ug/m3)')
#axs[int(i/4),i%4].set_xlabel('Low cost PM10 (ug/m3)')
axs[int(i/4),i%4].plot(X, res.intercept+res.slope*X, 'r')
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False )
plt.grid(False)
plt.xlabel("Low cost {}".format(var), fontsize=16)
plt.ylabel("Reference {}".format(var), fontsize=16)
#plt.savefig(Path(FigDir / '20210928_AllVars_CorrMatrix_{}.png'.format(var.split()[0])), dpi=90, bbox_inches='tight', pad_inches=0)
Pvars = ["03_PM10 (ug/m3)",
"04_PM10 (ug/m3)",
"05_PM10 (ug/m3)",
"08_PM10 (ug/m3)",
"09_PM10 (ug/m3)",
"10_PM10 (ug/m3)",
"11_PM10 (ug/m3)",
"12_PM10 (ug/m3)",
"13_PM10 (ug/m3)",
"14_PM10 (ug/m3)",
"15_PM10 (ug/m3)",
"17_PM10 (ug/m3)",
"18_PM10 (ug/m3)",
"20_PM10 (ug/m3)",
"21_PM10 (ug/m3)",
"22_PM10 (ug/m3)",
"23_PM10 (ug/m3)",
"26_PM10 (ug/m3)",
"27_PM10 (ug/m3)"]
varname="00_PM10 (ug/m3)"
dftmp=df[[varname]].resample('1H').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time',varname]].groupby('Time').describe()[varname]
fig, ax = plt.subplots(1, figsize=(12,6))
ax.plot(diurnal.index, diurnal['50%'], linewidth=2.0,label=varname, alpha=0.5)
for var in Pvars:
dftmp=df[[var]].resample('1H').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time',var]].groupby('Time').describe()[var]
ax.plot(diurnal.index, diurnal['50%'], linewidth=2.0,label=var, alpha=0.5)
ax.set_xlim(0, 23)
ax.set_xticks([i for i in range(0,24,3)])
plt.legend()
<matplotlib.legend.Legend at 0x7f4e3f6fee30>
pm10=[i for i in df.columns if 'PM10' in i and i != '00_PM10 (ug/m3)']
df[pm10].plot(subplots=True, figsize=(20,20))
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
dtype=object)
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
for var in Vars:
refvar='00_{}'.format(var)
dftmp=df[[refvar]].resample('5min').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnalref = dftmp[['Time',refvar]].groupby('Time').describe()[refvar]
lcvars=[i for i in df.columns if var in i and i != refvar]
fig, axs = plt.subplots(6,4, sharex='col',sharey='row',gridspec_kw={'hspace':0.1, 'wspace':0.1}, figsize=(12,12))
for i in range(len(lcvars)):
dftmp=df[[lcvars[i]]].resample('5min').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time',lcvars[i]]].groupby('Time').describe()[lcvars[i]]
axs[int(i/4),i%4].plot(diurnal.index, diurnal['50%'], linewidth=2.0,label=var, alpha=0.5)
axs[int(i/4),i%4].plot(diurnalref.index, diurnalref['50%'], linewidth=2.0,label='Reference', alpha=0.5)
#axs[int(i/4),i%4].set_xlim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].set_ylim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].text(0.3,0.1,"y={:.1f}x+{:.1f} ({:.2f})".format(res.intercept,res.slope, res.rvalue), fontsize=10, transform=axs[int(i/4),i%4].transAxes)
axs[int(i/4),i%4].set_title(lcvars[i].split("_")[0], y=1.0, pad=-14, fontsize=14)
for i in range(4):
axs[4,i%4].set_xlim(0, 23)
axs[4,i%4].set_xticks([i for i in range(0,24,6)])
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False )
plt.grid(False)
plt.xlabel("Time of day", fontsize=16)
plt.ylabel("Reference (orange) vs low-cost (blue) {}".format(var), fontsize=16)
#plt.savefig(Path(FigDir / '20210928_Diurnal_{}.png'.format(var.split()[0])), dpi=90, bbox_inches='tight', pad_inches=0)
df.columns
Index(['00_Ambient_Temp (degC)', '00_Atmospheric_Pressure (hPa)',
'00_CO (ppm)', '00_Horiba_SO2 (ppb)', '00_Internal_Temp (degC)',
'00_Line_Voltage (V)', '00_NO (ppb)', '00_NO2 (ppb)', '00_NOx (ppb)',
'00_O3 (ppb)',
...
'27_NO (ppb)', '27_NO2 (ppb)', '27_O3 (ppb)', '27_PM10 (ug/m3)',
'27_PM2.5 (ug/m3)', '27_Relative_Humidity (%)', '27_SO2 (ppb)',
'PM_large', 'PM_small', 'date_ordinal'],
dtype='object', length=263)
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
Instruments=['00',
'01',
'02',
'03',
'04',
'05',
'06',
'07',
'08',
'09',
'10',
'11',
'12',
'13',
'14',
'15',
'17',
'20',
'21',
'22',
'23',
'24',
'25',
'26',
'27']
Table={}
for i in Instruments:
inst=[]
for var in Vars:
varname='{}_{}'.format(i,var)
if varname in df.columns:
inst.append("{:.1f}".format(len(df[varname].dropna())/len(df[varname])*100))
#print(varname,"{:.1f}".format(len(df[varname].dropna())/len(df[varname])*100))
else:
inst.append("")
Table[i]=inst
Table
{'00': ['99.1', '99.5', '58.8', '78.3', '98.9', '99.3'],
'01': ['', '50.4', '', '', '', ''],
'02': ['', '70.1', '', '', '', ''],
'03': ['88.0', '88.0', '88.0', '', '88.0', '61.3'],
'04': ['51.8', '51.8', '51.8', '', '51.8', '38.9'],
'05': ['59.8', '59.8', '59.9', '59.9', '', '59.9'],
'06': ['43.6', '43.5', '', '', '', ''],
'07': ['', '', '', '', '', '26.3'],
'08': ['97.9', '95.2', '86.7', '78.3', '93.0', '97.4'],
'09': ['81.5', '81.5', '81.5', '81.5', '81.5', '81.5'],
'10': ['76.9', '76.9', '76.9', '76.9', '76.9', '76.9'],
'11': ['88.3', '88.3', '88.3', '88.3', '88.3', '88.3'],
'12': ['90.4', '90.4', '90.4', '90.4', '90.4', '90.4'],
'13': ['90.5', '90.5', '90.5', '90.5', '90.5', '90.5'],
'14': ['90.0', '90.0', '90.0', '90.0', '90.0', '90.0'],
'15': ['90.5', '90.5', '90.5', '90.5', '90.5', ''],
'17': ['90.0', '90.1', '90.1', '90.1', '90.1', '90.1'],
'20': ['97.2', '97.2', '79.2', '', '96.3', '97.0'],
'21': ['96.2', '96.2', '92.5', '95.5', '90.6', '94.4'],
'22': ['69.2', '69.2', '', '', '', ''],
'23': ['79.7', '79.7', '', '', '', ''],
'24': ['', '', '', '', '', ''],
'25': ['', '', '', '', '', ''],
'26': ['51.3', '51.3', '48.7', '48.6', '49.6', '36.7'],
'27': ['42.8', '42.1', '31.6', '42.3', '42.8', '42.4']}
dfDA=pd.DataFrame(data=Table).transpose()
dfDA.columns=Vars
dfDA
| PM10 (ug/m3) | PM2.5 (ug/m3) | O3 (ppb) | SO2 (ppb) | NO2 (ppb) | CO (ppm) | |
|---|---|---|---|---|---|---|
| 00 | 99.1 | 99.5 | 58.8 | 78.3 | 98.9 | 99.3 |
| 01 | 50.4 | |||||
| 02 | 70.1 | |||||
| 03 | 88.0 | 88.0 | 88.0 | 88.0 | 61.3 | |
| 04 | 51.8 | 51.8 | 51.8 | 51.8 | 38.9 | |
| 05 | 59.8 | 59.8 | 59.9 | 59.9 | 59.9 | |
| 06 | 43.6 | 43.5 | ||||
| 07 | 26.3 | |||||
| 08 | 97.9 | 95.2 | 86.7 | 78.3 | 93.0 | 97.4 |
| 09 | 81.5 | 81.5 | 81.5 | 81.5 | 81.5 | 81.5 |
| 10 | 76.9 | 76.9 | 76.9 | 76.9 | 76.9 | 76.9 |
| 11 | 88.3 | 88.3 | 88.3 | 88.3 | 88.3 | 88.3 |
| 12 | 90.4 | 90.4 | 90.4 | 90.4 | 90.4 | 90.4 |
| 13 | 90.5 | 90.5 | 90.5 | 90.5 | 90.5 | 90.5 |
| 14 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 | 90.0 |
| 15 | 90.5 | 90.5 | 90.5 | 90.5 | 90.5 | |
| 17 | 90.0 | 90.1 | 90.1 | 90.1 | 90.1 | 90.1 |
| 20 | 97.2 | 97.2 | 79.2 | 96.3 | 97.0 | |
| 21 | 96.2 | 96.2 | 92.5 | 95.5 | 90.6 | 94.4 |
| 22 | 69.2 | 69.2 | ||||
| 23 | 79.7 | 79.7 | ||||
| 24 | ||||||
| 25 | ||||||
| 26 | 51.3 | 51.3 | 48.7 | 48.6 | 49.6 | 36.7 |
| 27 | 42.8 | 42.1 | 31.6 | 42.3 | 42.8 | 42.4 |
#dfDA.to_csv(Path(TabDir / '20210829_DataAvailability.csv'),float_format='%.1f')
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
Instruments=['01',
'02',
'03',
'04',
'05',
'06',
'07',
'08',
'09',
'10',
'11',
'12',
'13',
'14',
'15',
'17',
'18',
'20',
'21',
'22',
'23',
'26',
'27']
Table={}
for i in Instruments:
inst=[]
for var in Vars:
refvar='00_{}'.format(var)
varname='{}_{}'.format(i,var)
if varname in df.columns:
dftmp=df[[refvar,varname]].resample('1H').mean()
dftmp=dftmp.dropna(how='any')
if len(dftmp) > 10:
X=dftmp[varname].values
Y=dftmp[refvar].values
res = linregress(X,Y)
inst.append("{:.1f}, {:.1f}, {:.2f}".format(res.intercept,res.slope, res.rvalue))
else:
inst.append("")
else:
inst.append("")
Table[i]=inst
dfDA=pd.DataFrame(data=Table).transpose()
dfDA.columns=Vars
dfDA
| PM10 (ug/m3) | PM2.5 (ug/m3) | O3 (ppb) | SO2 (ppb) | NO2 (ppb) | CO (ppm) | |
|---|---|---|---|---|---|---|
| 01 | 10.0, 0.7, 0.87 | |||||
| 02 | 11.1, 0.6, 0.85 | |||||
| 03 | 47.8, 0.8, 0.32 | 13.1, 1.6, 0.79 | 28.9, 0.0, 0.14 | 12.6, 0.1, 0.06 | 0.1, 1.2, 0.96 | |
| 04 | 56.8, 0.1, 0.07 | 25.9, 0.5, 0.45 | 7.6, 0.8, 0.81 | 11.8, 0.2, 0.28 | -0.0, 1.3, 0.96 | |
| 05 | 65.3, 0.0, 0.04 | 26.6, 1.9, 0.32 | 21.0, 0.2, 0.27 | -0.8, 0.2, 0.48 | 0.1, 1.2, 0.96 | |
| 06 | 30.2, 0.0, 0.18 | 23.7, 0.0, 0.08 | ||||
| 07 | 0.4, 0.2, 0.22 | |||||
| 08 | 50.8, 0.1, 0.23 | 20.4, 0.4, 0.63 | 16.5, 0.7, 0.43 | -0.4, 1.7, 0.45 | 7.5, 0.2, 0.30 | -0.2, 1.7, 0.94 |
| 09 | 22.5, 1.2, 0.62 | 2.6, 1.2, 0.93 | 11.2, 0.6, 0.52 | 5.0, 0.0, 0.24 | 20.1, -0.0, -0.39 | 1.6, -0.3, -0.37 |
| 10 | 25.2, 0.8, 0.55 | 8.1, 0.7, 0.83 | 15.5, 0.9, 0.48 | 3.6, 0.0, 0.22 | 23.4, -0.0, -0.31 | -1.9, 0.4, 0.46 |
| 11 | 20.5, 1.1, 0.64 | 2.4, 1.0, 0.94 | 14.0, 0.1, 0.44 | 7.0, 0.0, 0.05 | 19.5, -0.0, -0.35 | 0.6, -0.0, -0.09 |
| 12 | 20.9, 1.2, 0.63 | 2.5, 1.1, 0.93 | 13.7, 0.1, 0.50 | 7.7, 0.0, 0.02 | 20.4, -0.0, -0.39 | 0.5, 0.2, 0.02 |
| 13 | 21.6, 1.0, 0.62 | 3.1, 0.9, 0.94 | -2.0, 0.4, 0.44 | 6.6, 0.0, 0.06 | 22.5, -0.0, -0.38 | 0.2, 0.1, 0.20 |
| 14 | 21.4, 1.6, 0.65 | 3.2, 1.5, 0.93 | -14.8, 0.9, 0.43 | 7.4, 0.0, 0.04 | 20.6, -0.0, -0.38 | 0.5, 0.0, 0.02 |
| 15 | 23.7, 0.6, 0.66 | 7.0, 0.5, 0.91 | 17.9, 0.9, 0.76 | 7.5, 0.5, 0.26 | 10.0, 0.1, 0.17 | |
| 17 | 27.2, 1.5, 0.61 | 5.7, 5.8, 0.90 | 37.7, 0.8, 0.86 | 12.2, 0.1, 0.16 | 10.1, 0.2, 0.28 | 0.9, 1.1, 0.91 |
| 18 | 29.9, 0.7, 0.59 | 5.8, 0.7, 0.91 | ||||
| 20 | -0.8, 1.7, 0.63 | 1.3, 0.9, 0.68 | 10.5, 0.9, 0.92 | 3.8, 0.7, 0.58 | 0.1, 0.6, 0.94 | |
| 21 | 2.6, 1.2, 0.67 | -2.9, 0.8, 0.84 | 7.6, 0.9, 0.93 | -5.3, 1.0, 0.24 | 6.4, 0.7, 0.54 | 0.1, 0.6, 0.94 |
| 22 | 24.5, 0.9, 0.61 | 3.2, 0.9, 0.93 | ||||
| 23 | 31.7, 1.3, 0.59 | 8.8, 1.0, 0.93 | ||||
| 26 | 13.5, 1.4, 0.57 | -7.2, 1.5, 0.92 | 12.5, 1.0, 0.92 | 11.2, -0.4, -0.13 | 6.9, 0.4, 0.33 | 0.3, 0.0, 0.01 |
| 27 | 41.3, 0.4, 0.34 | 14.6, 0.7, 0.69 | 28.5, 0.1, 0.09 | 2.1, 0.8, 0.28 | -8.2, 0.7, 0.41 | -0.1, 1.4, 0.84 |
#dfDA.to_csv(Path(TabDir / '20210928_CorrelationWithReference.csv'))
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
Instruments=['00',
'01',
'02',
'03',
'04',
'05',
'06',
'07',
'08',
'09',
'10',
'11',
'12',
'13',
'14',
'15',
'17',
'20',
'21',
'22',
'23',
'24',
'25',
'26',
'27']
Table={}
for i in Instruments:
inst=[]
for var in Vars:
varname='{}_{}'.format(i,var)
if varname in df.columns:
inst.append("{:.1f}, {:.1f}, {:.1f}".format(df[varname].mean(),df[varname].std(),df[varname].quantile(0.99)))
#print(varname,"{:.1f}".format(len(df[varname].dropna())/len(df[varname])*100))
else:
inst.append("")
Table[i]=inst
dfDA=pd.DataFrame(data=Table).transpose()
dfDA.columns=Vars
#dfDA.to_csv(Path(TabDir / '20210829_DataStatistics.csv'))
Vars=['PM10 (ug/m3)', 'PM2.5 (ug/m3)', 'O3 (ppb)', 'SO2 (ppb)', 'NO2 (ppb)', 'CO (ppm)']
Instruments=['00',
'01',
'02',
'03',
'04',
'05',
'06',
'07',
'08',
'09',
'10',
'11',
'12',
'13',
'14',
'15',
'17',
'20',
'21',
'22',
'23',
'24',
'25',
'26',
'27']
Table={}
for i in Instruments:
inst=[]
for var in Vars:
varname='{}_{}'.format(i,var)
if varname in df.columns:
inst.append("{:.1f} ({:.1f})".format(df[varname].mean(),df[varname].std()))
#print(varname,"{:.1f}".format(len(df[varname].dropna())/len(df[varname])*100))
else:
inst.append("")
Table[i]=inst
dfDA=pd.DataFrame(data=Table).transpose()
dfDA.columns=Vars
#dfDA.to_csv(Path(TabDir / '20210928_DataMeanSTD.csv'))
dfDA
| PM10 (ug/m3) | PM2.5 (ug/m3) | O3 (ppb) | SO2 (ppb) | NO2 (ppb) | CO (ppm) | |
|---|---|---|---|---|---|---|
| 00 | 56.6 (57.7) | 30.3 (25.7) | 24.6 (18.1) | 8.0 (15.4) | 12.8 (9.7) | 0.5 (0.7) |
| 01 | 28.6 (32.2) | |||||
| 02 | 30.5 (34.2) | |||||
| 03 | 13.7 (22.7) | 11.4 (13.3) | -846.8 (473.9) | 4.6 (6.5) | 0.5 (0.6) | |
| 04 | 23.0 (60.3) | 15.4 (25.6) | 19.1 (16.9) | 15.2 (12.9) | 0.6 (0.7) | |
| 05 | 82.9 (300.8) | 4.2 (4.8) | 24.6 (22.4) | 58.5 (46.3) | 0.4 (0.6) | |
| 06 | 2551.0 (944.6) | 1384.6 (545.0) | ||||
| 07 | 1.2 (0.8) | |||||
| 08 | 56.7 (107.1) | 26.5 (40.9) | 21.8 (15.7) | 5.5 (4.0) | 28.9 (14.8) | 0.4 (0.4) |
| 09 | 28.5 (24.7) | 24.8 (20.7) | 27.0 (18.7) | 122.3 (144.3) | 608.6 (345.7) | 3.5 (0.9) |
| 10 | 34.0 (31.5) | 29.4 (35.9) | 8.4 (11.4) | 136.4 (127.8) | 336.2 (105.6) | 6.4 (0.8) |
| 11 | 31.7 (27.4) | 27.3 (23.3) | 92.7 (66.3) | 1129.5 (820.6) | 874.7 (438.8) | 1.4 (2.3) |
| 12 | 29.5 (25.5) | 25.5 (21.6) | 95.2 (81.8) | 1282.7 (873.2) | 889.1 (439.9) | 0.0 (0.1) |
| 13 | 35.0 (30.3) | 29.2 (25.6) | 77.5 (24.1) | 929.4 (558.4) | 479.8 (177.4) | 2.3 (1.0) |
| 14 | 22.1 (19.7) | 18.6 (16.0) | 47.2 (11.1) | 3717.1 (2830.5) | 944.2 (434.3) | 2.9 (2.3) |
| 15 | 56.6 (54.4) | 44.2 (43.1) | 14.8 (26.0) | 1.0 (8.1) | 27.1 (17.2) | |
| 17 | 21.7 (21.7) | 4.5 (4.1) | -13.9 (21.3) | -55.6 (34.0) | 20.9 (17.8) | -0.4 (0.6) |
| 20 | 33.8 (19.2) | 31.0 (19.2) | 23.1 (19.7) | 13.0 (8.2) | 0.7 (1.0) | |
| 21 | 44.8 (27.6) | 43.6 (27.8) | 24.0 (20.9) | 13.0 (3.6) | 10.0 (7.7) | 0.6 (1.0) |
| 22 | 35.5 (33.0) | 29.3 (25.3) | ||||
| 23 | 22.2 (23.8) | 22.1 (23.6) | ||||
| 24 | ||||||
| 25 | ||||||
| 26 | 33.5 (21.1) | 24.6 (13.4) | 24.8 (22.8) | 2.9 (4.6) | 10.1 (6.7) | 7.2 (2.5) |
| 27 | 34.1 (44.2) | 18.5 (20.2) | 37.4 (28.3) | 7.7 (4.5) | 25.7 (4.9) | 0.4 (0.3) |
for i in df.columns:
if i.startswith('00'):
print(i)
00_Ambient_Temp (degC) 00_Atmospheric_Pressure (hPa) 00_CO (ppm) 00_Horiba_SO2 (ppb) 00_Internal_Temp (degC) 00_Line_Voltage (V) 00_NO (ppb) 00_NO2 (ppb) 00_NOx (ppb) 00_O3 (ppb) 00_PM10 (ug/m3) 00_PM2.5 (ug/m3) 00_Rain (mm) 00_Relative_Humidity (%) 00_SO2 (ppb) 00_Solar_Radiation (W/m2) 00_Wind_Direction (deg) 00_Wind_Speed (m/s)
df[['00_Relative_Humidity (%)','00_Ambient_Temp (degC)']].plot(subplots=True,figsize=(12,12))
#plt.savefig(Path(FigDir / 'Ref_T_and_RH.png'), dpi=90, bbox_inches='tight', pad_inches=0)
array([<AxesSubplot:>, <AxesSubplot:>], dtype=object)
df[['00_SO2 (ppb)']].plot(subplots=True,figsize=(12,12))
#plt.savefig(Path(FigDir / '20210928_Ref_SO2.png'), dpi=90, bbox_inches='tight', pad_inches=0)
array([<AxesSubplot:>], dtype=object)
df[['00_PM10 (ug/m3)','00_PM2.5 (ug/m3)','00_SO2 (ppb)', '00_CO (ppm)', '00_O3 (ppb)', '00_NO2 (ppb)']].plot(subplots=True,figsize=(12,12))
#plt.savefig(Path(FigDir / '20210928_Ref_aqs.png'), dpi=90, bbox_inches='tight', pad_inches=0)
array([<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>,
<AxesSubplot:>, <AxesSubplot:>], dtype=object)
VarNames=['00_PM10 (ug/m3)', '00_PM2.5 (ug/m3)', '00_O3 (ppb)', '00_SO2 (ppb)', '00_NO2 (ppb)', '00_CO (ppm)']
VarNames.append('Time')
VarNames+['Time']
['00_PM10 (ug/m3)', '00_PM2.5 (ug/m3)', '00_O3 (ppb)', '00_SO2 (ppb)', '00_NO2 (ppb)', '00_CO (ppm)', 'Time', 'Time']
df[VarNames]
| 00_Relative_Humidity (%) | 00_Ambient_Temp (degC) | |
|---|---|---|
| 2021-04-26 00:00:00+02:00 | 94.7523 | 6.7870 |
| 2021-04-26 00:10:00+02:00 | 93.2106 | 6.7430 |
| 2021-04-26 00:20:00+02:00 | 93.8100 | 6.8754 |
| 2021-04-26 00:30:00+02:00 | 94.8399 | 6.6430 |
| 2021-04-26 00:40:00+02:00 | 95.3269 | 6.1009 |
| ... | ... | ... |
| 2021-10-29 23:10:00+02:00 | 93.9168 | 15.5931 |
| 2021-10-29 23:20:00+02:00 | 92.9653 | 15.4031 |
| 2021-10-29 23:30:00+02:00 | 91.9792 | 15.5470 |
| 2021-10-29 23:40:00+02:00 | 91.0115 | 15.6706 |
| 2021-10-29 23:50:00+02:00 | 92.2838 | 15.6385 |
26928 rows × 2 columns
VarNames=['00_PM10 (ug/m3)', '00_PM2.5 (ug/m3)', '00_O3 (ppb)', '00_SO2 (ppb)', '00_NO2 (ppb)', '00_CO (ppm)']
dftmp=df[VarNames].resample('10min').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time']+VarNames].groupby('Time').describe()[VarNames]
diurnal['00_PM10 (ug/m3)']['50%']
Time 00:00 47.22250 01:00 46.20000 02:00 43.01000 03:00 40.99750 04:00 39.75710 05:00 40.57625 06:00 42.14500 07:00 40.87875 08:00 53.70750 09:00 58.51375 10:00 47.37000 11:00 37.04250 12:00 31.08000 13:00 31.23250 14:00 30.88375 15:00 31.01125 16:00 32.03000 17:00 39.00375 18:00 49.57250 19:00 52.10125 20:00 55.27250 21:00 54.22250 22:00 52.56750 23:00 50.45000 Name: 50%, dtype: float64
VarNames=['00_PM10 (ug/m3)','00_PM2.5 (ug/m3)', '00_O3 (ppb)', '00_SO2 (ppb)', '00_NO2 (ppb)', '00_CO (ppm)']
for row in range(len(VarNames)):
print(row,diurnal[VarNames[i]]['50%'].values)
0 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125] 1 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125] 2 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125] 3 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125] 4 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125] 5 [30.16625 30.26375 29.75 29.35375 29.09625 30.83875 30.73875 29.57875 33.375 29.65 22.2475 17.03 14.475 13.115 12.77625 12.875 13.18875 15.695 20.9875 26.38875 29.8475 29.4425 32.35625 32.97125]
fig, axs = plt.subplots(6,1, sharex='col',sharey='row',gridspec_kw={'hspace':0.1, 'wspace':0.1}, figsize=(12,12))
VarNames=['00_PM10 (ug/m3)', '00_PM2.5 (ug/m3)', '00_O3 (ppb)', '00_SO2 (ppb)', '00_NO2 (ppb)', '00_CO (ppm)']
dftmp=df[VarNames].resample('5min').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time']+VarNames].groupby('Time').describe()[VarNames]
for i in range(len(VarNames)):
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['50%'].values, linewidth=2.0,label=VarNames[i], alpha=0.5, color='b')
#axs[i,0].plot(diurnalref.index, diurnal['50%'], linewidth=2.0,label='Reference', alpha=0.5)
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['75%'], color='b')
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['25%'], color='b')
axs[i].fill_between(diurnal.index, diurnal[VarNames[i]]['50%'], diurnal[VarNames[i]]['75%'], alpha=.25, facecolor='b')
axs[i].fill_between(diurnal.index, diurnal[VarNames[i]]['50%'], diurnal[VarNames[i]]['25%'], alpha=.25, facecolor='b')
axs[i].set_ylabel(VarNames[i].replace("00_",""))
#axs[int(i/4),i%4].set_xlim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].set_ylim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].text(0.3,0.1,"y={:.1f}x+{:.1f} ({:.2f})".format(res.intercept,res.slope, res.rvalue), fontsize=10, transform=axs[int(i/4),i%4].transAxes)
#axs[int(i/4),i%4].set_title(lcvars[i].split("_")[0], y=1.0, pad=-14, fontsize=14)
axs[i].set_xlim(0, 23)
axs[i].set_xticks([i for i in range(0,24,3)])
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False )
plt.grid(False)
plt.xlabel("Time of day", fontsize=16)
#plt.ylabel("Reference (orange) vs low-cost (blue) {}".format(VarNames[i]), fontsize=16)
#plt.savefig(Path(FigDir / '20210928_Diurnal_refvars.png'), dpi=90, bbox_inches='tight', pad_inches=0)
Text(0.5, 0, 'Time of day')
'00_Relative_Humidity (%)','00_Ambient_Temp (degC)'
('00_Relative_Humidity (%)', '00_Ambient_Temp (degC)')
fig, axs = plt.subplots(2,1, sharex='col',sharey='row',gridspec_kw={'hspace':0.1, 'wspace':0.1}, figsize=(12,6))
VarNames=['00_Relative_Humidity (%)','00_Ambient_Temp (degC)']
dftmp=df[VarNames].resample('5min').mean()
dftmp['Time'] = dftmp.index.map(lambda x: x.strftime("%H:00"))
diurnal = dftmp[['Time']+VarNames].groupby('Time').describe()[VarNames]
for i in range(len(VarNames)):
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['50%'].values, linewidth=2.0,label=VarNames[i], alpha=0.5, color='b')
#axs[i,0].plot(diurnalref.index, diurnal['50%'], linewidth=2.0,label='Reference', alpha=0.5)
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['75%'], color='b')
axs[i].plot(diurnal.index, diurnal[VarNames[i]]['25%'], color='b')
axs[i].fill_between(diurnal.index, diurnal[VarNames[i]]['50%'], diurnal[VarNames[i]]['75%'], alpha=.25, facecolor='b')
axs[i].fill_between(diurnal.index, diurnal[VarNames[i]]['50%'], diurnal[VarNames[i]]['25%'], alpha=.25, facecolor='b')
axs[i].set_ylabel(VarNames[i].replace("00_",""))
#axs[int(i/4),i%4].set_xlim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].set_ylim(0,dftmp[refvar].max())
#axs[int(i/4),i%4].text(0.3,0.1,"y={:.1f}x+{:.1f} ({:.2f})".format(res.intercept,res.slope, res.rvalue), fontsize=10, transform=axs[int(i/4),i%4].transAxes)
#axs[int(i/4),i%4].set_title(lcvars[i].split("_")[0], y=1.0, pad=-14, fontsize=14)
axs[i].set_xlim(0, 23)
axs[i].set_xticks([i for i in range(0,24,3)])
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False )
plt.grid(False)
plt.xlabel("Time of day", fontsize=16)
#plt.ylabel("Reference (orange) vs low-cost (blue) {}".format(VarNames[i]), fontsize=16)
#plt.savefig(Path(FigDir / '20210928_Diurnal_refTandRH.png'), dpi=90, bbox_inches='tight', pad_inches=0)
Text(0.5, 0, 'Time of day')